Skip to content

Commit

Permalink
fix short seq inclusion
Browse files Browse the repository at this point in the history
  • Loading branch information
markus583 committed May 21, 2024
1 parent f46b995 commit 79598b1
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
14 changes: 7 additions & 7 deletions wtpsplit/evaluation/intrinsic_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ class Args:

for f, name in [
(punkt_sentencize, "punkt"),
# (spacy_dp_sentencize, "spacy_dp"),
# (spacy_sent_sentencize, "spacy_sent"),
# (pysbd_sentencize, "pysbd"),
# (ersatz_sentencize, "ersatz"),
(spacy_dp_sentencize, "spacy_dp"),
(spacy_sent_sentencize, "spacy_sent"),
(pysbd_sentencize, "pysbd"),
(ersatz_sentencize, "ersatz"),
]:
print(f"Running {name} on {dataset_name} in {lang_code}...")
indices[lang][dataset_name][name] = {}
Expand All @@ -109,7 +109,7 @@ class Args:
concat_indices = {}
for doc in metrics:
for key, value in doc.items():
if isinstance(value, (float, int)):
if not isinstance(value, list):
# numeric
if key not in avg_results:
avg_results[key] = []
Expand Down Expand Up @@ -156,6 +156,6 @@ class Args:
# print(e)
results[lang][dataset_name][name] = None

json.dump(results, open(Constants.CACHE_DIR / "intrinsic_baselines_punkt.json", "w"), indent=4, default=int)
json.dump(indices, open(Constants.CACHE_DIR / "intrinsic_baselines_punkt_IDX.json", "w"), indent=4, default=int)
json.dump(results, open(Constants.CACHE_DIR / "intrinsic_baselines.json", "w"), indent=4, default=int)
json.dump(indices, open(Constants.CACHE_DIR / "intrinsic_baselines_IDX.json", "w"), indent=4, default=int)
print(Constants.CACHE_DIR / "intrinsic_baselines.json")
2 changes: 1 addition & 1 deletion wtpsplit/evaluation/intrinsic_baselines_multilingual.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class Args:
concat_indices = {}
for doc in metrics:
for key, value in doc.items():
if isinstance(value, (float, int)):
if not isinstance(value, list):
# numeric
if key not in avg_results:
avg_results[key] = []
Expand Down

0 comments on commit 79598b1

Please sign in to comment.