Skip to content

Commit

Permalink
fix: lazyloading of model used in metrics to speed up import (#32)
Browse files Browse the repository at this point in the history
* added init_model to baseline

* added init_model to everything

* fix lint issues

* added init model to qsquare

* ignore type issue

* fix linting

---------

Co-authored-by: Jithin James <[email protected]>
  • Loading branch information
jjmachan and Jithin James authored May 22, 2023
1 parent 6548f3c commit ca07b3d
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ lint: ## Running lint checker: ruff
@ruff check ragas examples tests
type: ## Running type checker: pyright
@echo "(pyright) Typechecking codebase..."
@pyright -p ragas
@pyright ragas
clean: ## Clean all generated files
@echo "Cleaning all generated files..."
@cd $(GIT_ROOT)/docs && make clean
Expand Down
20 changes: 20 additions & 0 deletions ragas/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,33 @@ class Metric(ABC):
@property
@abstractmethod
def name(self: t.Self) -> str:
"""
the metric name
"""
...

@property
@abstractmethod
def is_batchable(self: t.Self) -> bool:
"""
Attribute to check if this metric is is_batchable
"""
...

@abstractmethod
def init_model():
"""
This method will lazy initialize the model.
"""
...

@abstractmethod
def score(
self: t.Self, ground_truth: list[str], generated_text: list[str]
) -> list[float]:
"""
Run the metric on the ground_truth and generated_text and return score.
"""
...


Expand All @@ -37,6 +53,10 @@ def eval(self, ground_truth: list[list[str]], generated_text: list[str]) -> Resu
ds = Dataset.from_dict(
{"ground_truth": ground_truth, "generated_text": generated_text}
)

# initialize all the models in the metrics
[m.init_model() for m in self.metrics]

ds = ds.map(
self._get_score,
batched=self.batched,
Expand Down
9 changes: 5 additions & 4 deletions ragas/metrics/factual.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class EntailmentScore(Metric):
batch_size: int = 4
device: t.Literal["cpu", "cuda"] | Device = "cpu"

def __post_init__(self):
def init_model(self):
self.device = device_check(self.device)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
Expand Down Expand Up @@ -212,10 +212,11 @@ class Qsquare(Metric):
include_nouns: bool = True
save_results: bool = False

def __post_init__(self):
def init_model(self):
self.qa = QAGQ.from_pretrained(self.qa_model_name)
self.qg = QAGQ.from_pretrained(self.qg_model_name)
self.nli = EntailmentScore()
self.nli.init_model()
try:
self.nlp = spacy.load(SPACY_MODEL)
except OSError:
Expand Down Expand Up @@ -326,15 +327,15 @@ def score(self, ground_truth: list[str], generated_text: list[str], **kwargs):
)
gnd_qans[i] = [
{"question": qstn, "answer": ans}
for qstn, ans in zip(questions, candidates)
for qstn, ans in zip(questions, candidates) # type: ignore
]

for i, gen_text in enumerate(generated_text):
questions = [item["question"] for item in gnd_qans[i]]
gen_answers = self.generate_answers(questions, gen_text)
_ = [
item.update({"predicted_answer": ans})
for item, ans in zip(gnd_qans[i], gen_answers)
for item, ans in zip(gnd_qans[i], gen_answers) # type: ignore
]

# del self.qa
Expand Down
2 changes: 1 addition & 1 deletion ragas/metrics/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class BERTScore(Metric):
model_path: str = "all-MiniLM-L6-v2"
batch_size: int = 1000

def __post_init__(self):
def init_model(self):
self.model = SentenceTransformer(self.model_path)

@property
Expand Down
8 changes: 7 additions & 1 deletion ragas/metrics/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def name(self):
def is_batchable(self):
return True

def init_model(self):
...

def score(self, ground_truth: t.List[str], generated_text: t.List[str]):
ground_truth_ = [[word_tokenize(text)] for text in ground_truth]
generated_text_ = [word_tokenize(text) for text in generated_text]
Expand All @@ -45,7 +48,7 @@ class ROUGE(Metric):
type: t.Literal[ROUGE_TYPES]
use_stemmer: bool = False

def __post_init__(self):
def init_model(self):
self.scorer = rouge_scorer.RougeScorer(
[self.type], use_stemmer=self.use_stemmer
)
Expand Down Expand Up @@ -80,6 +83,9 @@ def name(self) -> str:
def is_batchable(self):
return True

def init_model(self):
...

def score(self, ground_truth: t.List[str], generated_text: t.List[str]):
if self.measure == "distance":
score = [distance(s1, s2) for s1, s2 in zip(ground_truth, generated_text)]
Expand Down
17 changes: 7 additions & 10 deletions tests/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,23 @@

from ragas.metrics import (
Evaluation,
edit_distance,
bert_score,
edit_ratio,
q_square,
rouge1,
rouge2,
rougeL,
)

DEVICE = "cuda" if is_available() else "cpu"
BATCHES = [0, 1]
BATCHES = [0, 1, 30, 60]

METRICS = {
"Rouge1": rouge1,
"Rouge2": rouge2,
"RougeL": rougeL,
# "Rouge2": rouge2,
# "RougeL": rougeL,
"EditRatio": edit_ratio,
"EditDistance": edit_distance,
# "SBERTScore": bert_score,
# "EditDistance": edit_distance,
"SBERTScore": bert_score,
# "EntailmentScore": entailment_score,
"Qsquare": q_square,
# "Qsquare": q_square,
}
DS = load_dataset("explodinggradients/eli5-test", split="test_eli5")
assert isinstance(DS, arrow_dataset.Dataset), "Not an arrow_dataset"
Expand Down

0 comments on commit ca07b3d

Please sign in to comment.