Skip to content

Commit

Permalink
Merge branch 'explodinggradients:main' into ayulockin/cache
Browse files Browse the repository at this point in the history
  • Loading branch information
ayulockin authored Dec 9, 2024
2 parents b3ebb1f + 57c6cbf commit 929deb8
Show file tree
Hide file tree
Showing 14 changed files with 974 additions and 40 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ pip install git+https://github.com/explodinggradients/ragas

### Evaluate your RAG with Ragas metrics

This is 4 main lines:
This is 5 main lines:

```python
from ragas import evaluate
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness
from langchain_openai.chat_models import ChatOpenAI
from ragas.llms import LangchainLLMWrapper
Expand Down
41 changes: 38 additions & 3 deletions docs/howtos/customizations/customize_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,12 @@ import google.auth
from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_core.outputs import LLMResult, ChatGeneration

config = {
"project_id": "<your-project-id>",
"chat_model_id": "gemini-1.0-pro-002",
"embedding_model_id": "textembedding-gecko",
"chat_model_id": "gemini-1.5-pro-002",
"embedding_model_id": "text-embedding-005",
}

# authenticate to GCP
Expand All @@ -89,7 +90,41 @@ vertextai_embeddings = VertexAIEmbeddings(
credentials=creds, model_name=config["embedding_model_id"]
)

vertextai_llm = LangchainLLMWrapper(vertextai_llm)
# Create a custom is_finished_parser to capture Gemini generation completion signals
def gemini_is_finished_parser(response: LLMResult) -> bool:
is_finished_list = []
for g in response.flatten():
resp = g.generations[0][0]

# Check generation_info first
if resp.generation_info is not None:
finish_reason = resp.generation_info.get("finish_reason")
if finish_reason is not None:
is_finished_list.append(
finish_reason in ["STOP", "MAX_TOKENS"]
)
continue

# Check response_metadata as fallback
if isinstance(resp, ChatGeneration) and resp.message is not None:
metadata = resp.message.response_metadata
if metadata.get("finish_reason"):
is_finished_list.append(
metadata["finish_reason"] in ["STOP", "MAX_TOKENS"]
)
elif metadata.get("stop_reason"):
is_finished_list.append(
metadata["stop_reason"] in ["STOP", "MAX_TOKENS"]
)

# If no finish reason found, default to True
if not is_finished_list:
is_finished_list.append(True)

return all(is_finished_list)


vertextai_llm = LangchainLLMWrapper(vertextai_llm, is_finished_parser=gemini_is_finished_parser)
vertextai_embeddings = LangchainEmbeddingsWrapper(vertextai_embeddings)
```
Yay! Now are you ready to use ragas with Google VertexAI endpoints
Expand Down
7 changes: 5 additions & 2 deletions src/ragas/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,15 @@ def __str__(self):

def parse_run_traces(
traces: t.Dict[str, ChainRun],
parent_run_id: t.Optional[str] = None,
) -> t.List[t.Dict[str, t.Any]]:

root_traces = [
chain_trace
for chain_trace in traces.values()
if chain_trace.parent_run_id is None
if chain_trace.parent_run_id == parent_run_id
]

if len(root_traces) > 1:
raise ValueError(
"Multiple root traces found! This is a bug on our end, please file an issue and we will fix it ASAP :)"
Expand All @@ -159,7 +162,7 @@ def parse_run_traces(
prompt_traces = {}
for i, prompt_uuid in enumerate(metric_trace.children):
prompt_trace = traces[prompt_uuid]
prompt_traces[f"{i}_{prompt_trace.name}"] = {
prompt_traces[f"{prompt_trace.name}"] = {
"input": prompt_trace.inputs.get("data", {}),
"output": prompt_trace.outputs.get("output", {}),
}
Expand Down
4 changes: 2 additions & 2 deletions src/ragas/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ragas.embeddings import BaseRagasEmbeddings
from ragas.llms import BaseRagasLLM
from ragas.losses import Loss
from ragas.optimizers import Optimizer
from ragas.optimizers import GeneticOptimizer, Optimizer

DEFAULT_OPTIMIZER_CONFIG = {"max_steps": 100}

Expand All @@ -20,7 +20,7 @@ class DemonstrationConfig(BaseModel):
class InstructionConfig(BaseModel):
enabled: bool = True
loss: t.Optional[Loss] = None
optimizer: Optimizer
optimizer: Optimizer = GeneticOptimizer()
optimizer_config: t.Dict[str, t.Any] = Field(
default_factory=lambda: DEFAULT_OPTIMIZER_CONFIG
)
Expand Down
12 changes: 11 additions & 1 deletion src/ragas/dataset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass, field
from uuid import UUID

import numpy as np
from datasets import Dataset as HFDataset
Expand Down Expand Up @@ -43,6 +44,13 @@ def get_features(self) -> t.List[str]:
"""
return list(self.to_dict().keys())

def to_string(self) -> str:
"""
Get the string representation of the sample.
"""
sample_dict = self.to_dict()
return "".join(f"\n{key}:\n\t{val}\n" for key, val in sample_dict.items())


class SingleTurnSample(BaseSample):
"""
Expand Down Expand Up @@ -378,6 +386,7 @@ class EvaluationResult:
cost_cb: t.Optional[CostCallbackHandler] = None
traces: t.List[t.Dict[str, t.Any]] = field(default_factory=list)
ragas_traces: t.Dict[str, ChainRun] = field(default_factory=dict, repr=False)
run_id: t.Optional[UUID] = None

def __post_init__(self):
# transform scores from list of dicts to dict of lists
Expand All @@ -395,7 +404,8 @@ def __post_init__(self):
values.append(value + 1e-10)

# parse the traces
self.traces = parse_run_traces(self.ragas_traces)
run_id = str(self.run_id) if self.run_id is not None else None
self.traces = parse_run_traces(self.ragas_traces, run_id)

def __repr__(self) -> str:
score_strs = [f"'{k}': {v:0.4f}" for k, v in self._repr_dict.items()]
Expand Down
9 changes: 8 additions & 1 deletion src/ragas/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

import typing as t
from uuid import UUID

from datasets import Dataset
from langchain_core.callbacks import BaseCallbackHandler, BaseCallbackManager
from langchain_core.embeddings import Embeddings as LangchainEmbeddings
from langchain_core.language_models import BaseLanguageModel as LangchainLLM
from tqdm.auto import tqdm

from ragas._analytics import track_was_completed
from ragas.callbacks import ChainType, RagasTracer, new_group
Expand Down Expand Up @@ -59,12 +61,14 @@ def evaluate(
embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None,
callbacks: Callbacks = None,
in_ci: bool = False,
run_config: RunConfig = RunConfig(),
run_config: t.Optional[RunConfig] = None,
token_usage_parser: t.Optional[TokenUsageParser] = None,
raise_exceptions: bool = False,
column_map: t.Optional[t.Dict[str, str]] = None,
show_progress: bool = True,
batch_size: t.Optional[int] = None,
_run_id: t.Optional[UUID] = None,
_pbar: t.Optional[tqdm] = None,
) -> EvaluationResult:
"""
Run the evaluation on the dataset with different metrics
Expand Down Expand Up @@ -146,6 +150,7 @@ def evaluate(
"""
column_map = column_map or {}
callbacks = callbacks or []
run_config = run_config or RunConfig()

if helicone_config.is_enabled:
import uuid
Expand Down Expand Up @@ -226,6 +231,7 @@ def evaluate(
run_config=run_config,
show_progress=show_progress,
batch_size=batch_size,
pbar=_pbar,
)

# Ragas Callbacks
Expand Down Expand Up @@ -333,6 +339,7 @@ def evaluate(
cost_cb,
),
ragas_traces=tracer.traces,
run_id=_run_id,
)
if not evaluation_group_cm.ended:
evaluation_rm.on_chain_end({"scores": result.scores})
Expand Down
38 changes: 24 additions & 14 deletions src/ragas/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class Executor:
batch_size: t.Optional[int] = None
run_config: t.Optional[RunConfig] = field(default=None, repr=False)
_nest_asyncio_applied: bool = field(default=False, repr=False)
pbar: t.Optional[tqdm] = None

def wrap_callable_with_index(
self, callable: t.Callable, counter: int
Expand Down Expand Up @@ -130,21 +131,22 @@ async def _process_jobs(self) -> t.List[t.Any]:
results = []

if not self.batch_size:
with tqdm(
total=len(self.jobs),
desc=self.desc,
disable=not self.show_progress,
) as pbar:
# Create coroutines
coroutines = [
afunc(*args, **kwargs) for afunc, args, kwargs, _ in self.jobs
]
for future in await as_completed(coroutines, max_workers):
result = await future
results.append(result)
pbar.update(1)
# Use external progress bar if provided, otherwise create one
if self.pbar is None:
with tqdm(
total=len(self.jobs),
desc=self.desc,
disable=not self.show_progress,
) as internal_pbar:
await self._process_coroutines(
self.jobs, internal_pbar, results, max_workers
)
else:
await self._process_coroutines(
self.jobs, self.pbar, results, max_workers
)

return results
return results

# With batching, show nested progress bars
batches = batched(self.jobs, self.batch_size) # generator of job tuples
Expand Down Expand Up @@ -182,6 +184,14 @@ async def _process_jobs(self) -> t.List[t.Any]:

return results

async def _process_coroutines(self, jobs, pbar, results, max_workers):
"""Helper function to process coroutines and update the progress bar."""
coroutines = [afunc(*args, **kwargs) for afunc, args, kwargs, _ in jobs]
for future in await as_completed(coroutines, max_workers):
result = await future
results.append(result)
pbar.update(1)

def results(self) -> t.List[t.Any]:
"""
Execute all submitted jobs and return their results. The results are returned in the order of job submission.
Expand Down
20 changes: 12 additions & 8 deletions src/ragas/llms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def __init__(
def is_finished(self, response: LLMResult) -> bool:
"""
Parse the response to check if the LLM finished by checking the finish_reason
or stop_reason.
or stop_reason. Supports OpenAI and Vertex AI models.
"""
if self.is_finished_parser is not None:
return self.is_finished_parser(response)
Expand All @@ -145,30 +145,34 @@ def is_finished(self, response: LLMResult) -> bool:
resp = g.generations[0][0]
if resp.generation_info is not None:
# generation_info is provided - so we parse that

# OpenAI uses "stop" to indicate that the generation is finished
# and is stored in 'finish_reason' key in generation_info
if resp.generation_info.get("finish_reason") is not None:
finish_reason = resp.generation_info.get("finish_reason")
if finish_reason is not None:
# OpenAI uses "stop"
# Vertex AI uses "STOP" or "MAX_TOKENS"
is_finished_list.append(
resp.generation_info.get("finish_reason") == "stop"
finish_reason in ["stop", "STOP", "MAX_TOKENS"]
)

# provied more conditions here
# https://github.com/explodinggradients/ragas/issues/1548

# if generation_info is empty, we parse the response_metadata
# this is less reliable

elif (
isinstance(resp, ChatGeneration)
and t.cast(ChatGeneration, resp).message is not None
):
resp_message: BaseMessage = t.cast(ChatGeneration, resp).message
if resp_message.response_metadata.get("finish_reason") is not None:
finish_reason = resp_message.response_metadata.get("finish_reason")
is_finished_list.append(
resp_message.response_metadata.get("finish_reason") == "stop"
finish_reason in ["stop", "STOP", "MAX_TOKENS"]
)
elif resp_message.response_metadata.get("stop_reason") is not None:
stop_reason = resp_message.response_metadata.get("stop_reason")
is_finished_list.append(
resp_message.response_metadata.get("stop_reason") == "end_turn"
stop_reason in ["end_turn", "STOP", "MAX_TOKENS"]
)
# default to True
else:
Expand Down
14 changes: 14 additions & 0 deletions src/ragas/losses.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import typing as t
from abc import ABC, abstractmethod

from pydantic import GetCoreSchemaHandler
from pydantic_core import CoreSchema, core_schema


class Loss(ABC):
"""
Expand All @@ -11,6 +14,17 @@ class Loss(ABC):
def __call__(self, predicted: t.List, actual: t.List) -> float:
raise NotImplementedError

@classmethod
def __get_pydantic_core_schema__(
cls, source_type: t.Any, handler: GetCoreSchemaHandler
) -> CoreSchema:
"""
Define how Pydantic generates a schema for BaseRagasEmbeddings.
"""
return core_schema.no_info_after_validator_function(
cls, core_schema.is_instance_schema(cls) # The validator function
)


class MSELoss(Loss):
"""
Expand Down
Loading

0 comments on commit 929deb8

Please sign in to comment.