diff --git a/find_tokens_in_training_data.ipynb b/find_tokens_in_training_data.ipynb index 23d2e6e..e7f571b 100644 --- a/find_tokens_in_training_data.ipynb +++ b/find_tokens_in_training_data.ipynb @@ -54,7 +54,7 @@ "# group phrases derived from the same docs\n", "for p in phrases:\n", " count = sum(s['content'].count(p) for s in finds[p])\n", - " print(f\"Phrase {repr(p):<40} found in {len(finds[p])}/{len(ds['train'])} samples\\t hash {hashes[p]}\\t {count} occurences\")\n" + " print(f\"Phrase {repr(p):<40} found in {len(finds[p])}/{len(ds['train'])} samples\\t hash {hashes[p]}\\t {count} occurrences\")\n" ] }, {