Skip to content

Commit

Permalink
Fix evaluation issue when latency unit is spm for iterative dataloader (
Browse files Browse the repository at this point in the history
#98)

* spm-fix

* Update simuleval/evaluator/evaluator.py

Co-authored-by: Abinesh Ramakrishnan <[email protected]>

* Update simuleval/evaluator/evaluator.py

Co-authored-by: Abinesh Ramakrishnan <[email protected]>

* Update simuleval/evaluator/instance.py

Co-authored-by: Abinesh Ramakrishnan <[email protected]>

* fix argument

---------

Co-authored-by: Abinesh Ramakrishnan <[email protected]>
  • Loading branch information
xutaima and ibanesh authored Nov 30, 2023
1 parent f49f017 commit a03d03f
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
2 changes: 1 addition & 1 deletion simuleval/evaluator/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def build_instances_from_log(self):
if self.output is not None:
with open(self.output / "instances.log", "r") as f:
for line in f:
instance = LogInstance(line.strip())
instance = LogInstance(line.strip(), self.args.eval_latency_unit)
index = instance.index - self.start_index
self.instances[index] = instance
self.instances[index].set_target_spm_model(self.target_spm_model)
Expand Down
18 changes: 14 additions & 4 deletions simuleval/evaluator/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,21 +456,31 @@ class SpeechToSpeechInstance(SpeechInputInstance, SpeechOutputInstance):


class LogInstance:
def __init__(self, info: str) -> None:
def __init__(self, info: str, latency_unit: str = "word") -> None:
self.info = json.loads(info.strip())
self.intervals = []
for key, value in self.info.items():
setattr(self, key, value)

self.index = self.info["index"]
self.reference = self.info.get("reference", "")
self.reference_length = len(
self.reference.split(" ")
) # ToDo: temporary solution, make it configurable
self.latency_unit = latency_unit
self.source_length = self.info.get("source_length") # just for testing!
self.finish_prediction = True
self.metrics = {}
self.target_spm_model = None

def set_target_spm_model(self, spm_model):
self.target_spm_model = spm_model

@property
def reference_length(self) -> int:
if self.latency_unit == "word":
return len(self.reference.split(" "))
elif self.latency_unit == "char":
return len(self.reference.strip())
elif self.latency_unit == "spm":
assert self.target_spm_model is not None
return len(self.target_spm_model.encode(self.reference, out_type=str))
else:
raise NotImplementedError

0 comments on commit a03d03f

Please sign in to comment.