diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
index b36aa7e80e..6f4062502b 100644
--- a/.github/workflows/examples.yml
+++ b/.github/workflows/examples.yml
@@ -42,7 +42,7 @@ jobs:
if: matrix.minimal
run: pip install -r requirements.min.txt
- name: Prepare examples dependencies
- run: pip install catboost sentence-transformers
+ run: pip install catboost sentence-transformers openai
- name: Export examples
run: jupyter nbconvert --to python examples/*/*.ipynb --output-dir example_scripts
- name: Run examples
diff --git a/docs/book/reference/all-metrics.md b/docs/book/reference/all-metrics.md
index 484440cd06..29d5762a5b 100644
--- a/docs/book/reference/all-metrics.md
+++ b/docs/book/reference/all-metrics.md
@@ -274,7 +274,14 @@ Check for regular expression matches.
| **ExcludesWords()**
- Checks if the text excludes all specified words.
- Considers only vocabulary words (from NLTK vocabulary).
- By default, considers inflected and variant forms of the same word.
- Returns True/False for every input.
Example use:
`ExcludesWords(words_list=['buy', 'sell', 'bet']`| **Required:**
`words_list: List[str]`
**Optional:**- `display_name`
- `mode = 'all'` or `'any'`
- `lemmatize = True` or `False`
|
| **ItemMatch()** - Checks whether the text contains **any** (default) or **all** specified items that are specific to each row (represented as tuples)
- Returns True/False for each row.
Example use:
`ItemMatch(with_column="expected")`| **Required:**
`with_column: str`
**Optional:**- `display_name`
- `mode = 'all'` or `'any'`
- `case_sensitive = True` or `False`
|
| **ItemNoMatch()** - Checks whether the text excludes **any** (default) or **all** specified items that are specific to each row (represented as tuples)
- Returns True/False for each row.
Example use:
`ItemMatch(with_column="forbidden")`| **Required:**
`with_column: str`
**Optional:**- `display_name`
- `mode = 'all'` or `'any'`
- `case_sensitive = True` or `False`
|
+| **WordMatch()** - Checks whether the text includes **any** (default) or **all** specified words for each row (represented as tuples).
- Considers only vocabulary words (from NLTK vocabulary).
- By default, considers inflected and variant forms of the same word.
- Returns True/False for every input.
Example use:
`WordMatch(with_column="expected"` | **Required:**
`with_column: str`
**Optional:**- `display_name`
- `mode = 'any'` or `'all'`
- `lemmatize = True` or `False`
|
+| **WordNoMatch()** - Checks whether the text excludes **any** (default) or **all** specified words for each row (represented as tuples).
- Considers only vocabulary words (from NLTK vocabulary).
- By default, considers inflected and variant forms of the same word.
- Returns True/False for every input.
Example use:
`WordMatch(with_column="forbidden"` | **Required:**
`with_column: str`
**Optional:**- `display_name`
- `mode = 'any'` or `'all'`
- `lemmatize = True` or `False`
|
+| **ExactMatch()** - Checks if the text matches between two columns.
- Returns True/False for every input.
Example use:
`ExactMatch(column_name='column_1')`| **Required:**
`with_column`
**Optional:** |
+| **IsValidJSON()** - Checks if the text in a specified column is a valid JSON.
- Returns True/False for every input.
Example use:
`IsValidJSON(column_name='column_1')`| **Required:**
`column_name`
**Optional:** |
| **JSONSchemaMatch()** - Checks if the text contains a JSON object matching the **expected_schema**. Supports exact (**exact=True**) or minimal (**exact=False**) matching, with optional strict type validation (**validate_types=True**).
- Returns True/False for each row.
Example use:
`JSONSchemaMatch(expected_schema={"name": str, "age": int}, exact_match=False, validate_types=True)`| **Required:**
`expected_schema: Dict[str, type]`
**Optional:**- `exact_match = True` or `False`
- `validate_types = True` or `False`
|
+| **JSONMatch()** - Compares two columns of a dataframe and checks whether the two objects in each row of the dataframe are matching JSON's or not.
- Returns True/False for every input.
Example use:
`JSONMatch(with_column="column_2")`| **Required:**
`with_column : str`
**Optional:**- `display_name`
|
+| **ContainsLink()** - Checks if the text contains at least one valid URL.
- Returns True/False for each row.
Example use:
`ContainsLink(column_name='column_1')`| **Required:**
`column_name: str`
**Optional:** |
+| **IsValidPython()** - Checks if the text is valid Python code without syntax errors.
- Returns True/False for every input.
Example use:
`IsValidPython(column_name='column_1')`| **Required:**
n/a
**Optional:** |
## Descriptors: Text stats
diff --git a/example_test.py b/example_test.py
index 9132656429..ae4c71ffe4 100644
--- a/example_test.py
+++ b/example_test.py
@@ -12,8 +12,8 @@
"comparing_custom_statest_with_classic_distributions.py",
"how_to_evaluate_llm_with_text_descriptors.py",
"how_to_run_drift_report_for_text_data.py", # too slow & torch version conflict?
- "llm_evaluation_tutorial.ipynb", # cloud usage
- "llm_tracing_tutorial.ipynb", # cloud usage
+ "llm_evaluation_tutorial.py", # cloud usage
+ "llm_tracing_tutorial.py", # cloud usage
]
diff --git a/examples/how_to_questions/how_to_make_custom_metric_and_test.ipynb b/examples/how_to_questions/how_to_make_custom_metric_and_test.ipynb
index cb15262531..1822d3dec2 100644
--- a/examples/how_to_questions/how_to_make_custom_metric_and_test.ipynb
+++ b/examples/how_to_questions/how_to_make_custom_metric_and_test.ipynb
@@ -98,6 +98,8 @@
"outputs": [],
"source": [
"class MyMetricResult(MetricResult):\n",
+ " class Config:\n",
+ " type_alias = \"evidently:metric_result:MyMetricResult\"\n",
" sum_value: float"
]
},
@@ -119,6 +121,8 @@
"outputs": [],
"source": [
"class MyMetric(Metric[MyMetricResult]):\n",
+ " class Config:\n",
+ " type_alias = \"evidently:metric:MyMetric\"\n",
" column_name: str\n",
"\n",
" def __init__(self, column_name: str):\n",
@@ -235,6 +239,8 @@
"\n",
"\n",
"class MyMetricResult(MetricResult):\n",
+ " class Config:\n",
+ " type_alias = \"evidently:metric_result:MyMetricResult\"\n",
" feature_name: str\n",
" current_sum_value: float\n",
" x_values_for_hist: list\n",
@@ -243,6 +249,8 @@
"\n",
"\n",
"class MyMetric(Metric[MyMetricResult]):\n",
+ " class Config:\n",
+ " type_alias = \"evidently:metric:MyMetric\"\n",
" column_name: str\n",
"\n",
" def __init__(self, column_name: str) -> None:\n",
diff --git a/examples/how_to_questions/how_to_run_calculations_over_text_data.ipynb b/examples/how_to_questions/how_to_run_calculations_over_text_data.ipynb
index 5a0bcefae9..7e0ae5c011 100644
--- a/examples/how_to_questions/how_to_run_calculations_over_text_data.ipynb
+++ b/examples/how_to_questions/how_to_run_calculations_over_text_data.ipynb
@@ -44,7 +44,6 @@
"from evidently.metric_preset import RegressionPreset\n",
"from evidently.metric_preset import ClassificationPreset\n",
"from evidently.metric_preset import TargetDriftPreset\n",
- "from evidently.metric_preset import TextOverviewPreset\n",
"\n",
"from evidently.metrics import *\n",
"\n",
@@ -360,52 +359,6 @@
"classification_report.json()"
]
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "IAe0g1SWlV4L"
- },
- "source": [
- "# Text Overview Preset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000
- },
- "id": "9H8xHPN-tYY8",
- "outputId": "644abbf8-d717-484f-8125-902b99288c59"
- },
- "outputs": [],
- "source": [
- "text_overview_report = Report(metrics=[\n",
- " TextOverviewPreset(column_name=\"Review_Text\")\n",
- "])\n",
- "\n",
- "text_overview_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
- "text_overview_report"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 104
- },
- "id": "msjgy3j-f-5i",
- "outputId": "0e86becd-b75d-42f9-d115-72de002f8786"
- },
- "outputs": [],
- "source": [
- "text_overview_report.json()"
- ]
- },
{
"cell_type": "markdown",
"metadata": {
diff --git a/examples/how_to_questions/how_to_use_llm_judge_template.ipynb b/examples/how_to_questions/how_to_use_llm_judge_template.ipynb
index d6f57a6ea5..3623818fa7 100644
--- a/examples/how_to_questions/how_to_use_llm_judge_template.ipynb
+++ b/examples/how_to_questions/how_to_use_llm_judge_template.ipynb
@@ -191,9 +191,7 @@
"id": "204d90a4-694e-406b-949a-f7ba3b601eac",
"metadata": {},
"outputs": [],
- "source": [
- "print(ToxicityLLMEval().get_template().get_prompt_template())"
- ]
+ "source": "print(ToxicityLLMEval().get_template().get_template())"
},
{
"cell_type": "code",
@@ -308,7 +306,7 @@
"source": [
"#that's how you can see the prompt\n",
"\n",
- "print(ContextQualityLLMEval(question=\"question\").get_template().get_prompt_template())"
+ "print(ContextQualityLLMEval(question=\"question\").get_template().get_template())"
]
},
{
@@ -414,12 +412,13 @@
" pre_messages=[(\"system\", \"You are a judge which evaluates text.\")],\n",
" ),\n",
" provider = \"openai\",\n",
- " model = \"gpt-4o-mini\"\n",
+ " model = \"gpt-4o-mini\",\n",
+ " display_name=\"test\"\n",
")\n",
"\n",
"report = Report(metrics=[\n",
" TextEvals(column_name=\"response\", descriptors=[\n",
- " custom_judge(display_name=\"test\")\n",
+ " custom_judge\n",
" ])\n",
"])\n",
"\n",
diff --git a/requirements.dev.txt b/requirements.dev.txt
index 606cca9714..a4f38fdd44 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -3,7 +3,8 @@ wheel==0.38.1
setuptools==65.5.1; python_version < '3.12'
setuptools==68.2.2; python_version >= '3.12'
jupyter==1.0.0
-mypy==0.981
+mypy==1.1.1
+pandas-stubs
pytest==7.4.4
pytest-asyncio==0.23.7
types-PyYAML==6.0.1
diff --git a/setup.cfg b/setup.cfg
index 231d1f6f6c..c0e8eef2c4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -26,6 +26,8 @@ files = src/evidently
python_version = 3.8
disable_error_code = misc
namespace_packages = true
+no_implicit_optional = False
+plugins = pydantic.mypy,numpy.typing.mypy_plugin
[mypy-nltk.*]
ignore_missing_imports = True
@@ -46,12 +48,6 @@ ignore_missing_imports = True
[mypy-sentence_transformers.*]
ignore_missing_imports = True
-[mypy-pandas.*]
-ignore_missing_imports = True
-
-[mypy-numpy.*]
-ignore_missing_imports = True
-
[mypy-scipy.*]
ignore_missing_imports = True
diff --git a/setup.py b/setup.py
index 329d5869fe..63e81651a5 100644
--- a/setup.py
+++ b/setup.py
@@ -85,7 +85,8 @@
"setuptools==65.5.1; python_version < '3.12'",
"setuptools==68.2.2; python_version >= '3.12'",
"jupyter==1.0.0",
- "mypy==0.981",
+ "mypy==1.1.1",
+ "pandas-stubs>=1.3.5",
"pytest==7.4.4",
"types-PyYAML==6.0.1",
"types-requests==2.26.0",
diff --git a/src/evidently/_pydantic_compat.py b/src/evidently/_pydantic_compat.py
index 5b0cf2ba0d..3cef08bff5 100644
--- a/src/evidently/_pydantic_compat.py
+++ b/src/evidently/_pydantic_compat.py
@@ -30,13 +30,13 @@
from pydantic.v1.typing import DictStrAny
else:
- from pydantic import BaseConfig
- from pydantic import BaseModel
- from pydantic import Extra
- from pydantic import Field
+ from pydantic import BaseConfig # type: ignore[assignment]
+ from pydantic import BaseModel # type: ignore[assignment]
+ from pydantic import Extra # type: ignore[assignment]
+ from pydantic import Field # type: ignore[assignment]
from pydantic import PrivateAttr
- from pydantic import SecretStr
- from pydantic import ValidationError
+ from pydantic import SecretStr # type: ignore[assignment]
+ from pydantic import ValidationError # type: ignore[assignment]
from pydantic import parse_obj_as
from pydantic import validator
from pydantic.fields import SHAPE_DICT # type: ignore[attr-defined,no-redef]
diff --git a/src/evidently/base_metric.py b/src/evidently/base_metric.py
index 29d559078b..631446be84 100644
--- a/src/evidently/base_metric.py
+++ b/src/evidently/base_metric.py
@@ -18,6 +18,7 @@
import pandas as pd
import typing_inspect
+from evidently._pydantic_compat import Field
from evidently._pydantic_compat import ModelMetaclass
from evidently._pydantic_compat import PrivateAttr
from evidently.core import BaseResult
@@ -142,12 +143,14 @@ def get_datasets(self) -> Tuple[Optional[TEngineDataType], TEngineDataType]:
class InputData(GenericInputData[pd.DataFrame]):
@staticmethod
- def _get_by_column_name(dataset: pd.DataFrame, additional: pd.DataFrame, column: ColumnName) -> pd.Series:
+ def _get_by_column_name(dataset: pd.DataFrame, additional: Optional[pd.DataFrame], column: ColumnName) -> pd.Series:
if column.dataset == DatasetType.MAIN:
if column.name not in dataset.columns:
raise ColumnNotFound(column.name)
return dataset[column.name]
if column.dataset == DatasetType.ADDITIONAL:
+ if additional is None:
+ raise ValueError("no additional dataset is provided, but field requested")
return additional[column.name]
raise ValueError("unknown column data")
@@ -220,7 +223,9 @@ def __get__(self, instance: Optional["Metric"], type: Type["Metric"]) -> FieldPa
class WithResultFieldPathMetaclass(FrozenBaseMeta):
def result_type(cls) -> Type[MetricResult]:
- return typing_inspect.get_args(next(b for b in cls.__orig_bases__ if typing_inspect.is_generic_type(b)))[0]
+ return typing_inspect.get_args(
+ next(b for b in cls.__orig_bases__ if typing_inspect.is_generic_type(b)) # type: ignore[attr-defined]
+ )[0]
class BasePreset(EvidentlyBaseModel):
@@ -236,7 +241,7 @@ class Config:
_context: Optional["Context"] = None
- options: Options
+ options: Optional[Options] = Field(default=None)
fields: ClassVar[FieldsDescriptor] = FieldsDescriptor()
# resulting options will be determined via
diff --git a/src/evidently/calculation_engine/engine.py b/src/evidently/calculation_engine/engine.py
index c46e123829..4f78aca315 100644
--- a/src/evidently/calculation_engine/engine.py
+++ b/src/evidently/calculation_engine/engine.py
@@ -117,8 +117,6 @@ def get_additional_features(self, data_definition: DataDefinition) -> List[Gener
continue
for feature in required_features:
fp = feature.get_fingerprint()
- if fp in feature:
- continue
features[fp] = feature
return list(features.values())
diff --git a/src/evidently/calculation_engine/python_engine.py b/src/evidently/calculation_engine/python_engine.py
index 4cfed0e72e..3c17695865 100644
--- a/src/evidently/calculation_engine/python_engine.py
+++ b/src/evidently/calculation_engine/python_engine.py
@@ -71,8 +71,8 @@ def calculate_additional_features(
def merge_additional_features(
self, features: Dict[GeneratedFeatures, FeatureResult[pd.DataFrame]]
) -> EngineDatasets[pd.DataFrame]:
- currents: List[pd.DataFrame] = []
- references: List[pd.DataFrame] = []
+ currents = []
+ references = []
for feature, result in features.items():
currents.append(result.current)
@@ -84,15 +84,13 @@ def merge_additional_features(
elif len(currents) == 1:
current = currents[0]
else:
- cur, *currents = currents
- current = cur.join(currents)
+ current = currents[0].join(currents[1:]) # type: ignore[arg-type]
if len(references) == 0:
return EngineDatasets(current=current, reference=None)
if len(references) == 1:
return EngineDatasets(current=current, reference=references[0])
- ref, *references = references
- return EngineDatasets(current=current, reference=ref.join(references))
+ return EngineDatasets(current=current, reference=references[0].join(references[1:])) # type: ignore[arg-type]
def get_metric_implementation(self, metric):
impl = super().get_metric_implementation(metric)
diff --git a/src/evidently/calculations/classification_performance.py b/src/evidently/calculations/classification_performance.py
index 66bf140c54..92465d15fe 100644
--- a/src/evidently/calculations/classification_performance.py
+++ b/src/evidently/calculations/classification_performance.py
@@ -225,7 +225,7 @@ def get_prediction_data(
return PredictionData(
predictions=data[prediction],
prediction_probas=None,
- labels=data[prediction].unique().tolist(),
+ labels=data[prediction].unique().tolist(), # type: ignore[operator]
)
@@ -376,7 +376,7 @@ def calculate_metrics(
f1 = metrics.f1_score(target, prediction.predictions, average="macro")
if prediction.prediction_probas is not None:
binaraized_target = (
- target.astype(str).values.reshape(-1, 1) == list(prediction.prediction_probas.columns.astype(str))
+ target.astype(str).to_numpy().reshape(-1, 1) == list(prediction.prediction_probas.columns.astype(str))
).astype(int)
prediction_probas_array = prediction.prediction_probas.to_numpy()
roc_auc = metrics.roc_auc_score(binaraized_target, prediction_probas_array, average="macro")
diff --git a/src/evidently/calculations/data_drift.py b/src/evidently/calculations/data_drift.py
index 21384d3969..a53dc62849 100644
--- a/src/evidently/calculations/data_drift.py
+++ b/src/evidently/calculations/data_drift.py
@@ -210,7 +210,7 @@ def get_one_column_drift(
current_scatter["Timestamp"] = current_data[datetime_column_name]
x_name = "Timestamp"
else:
- current_scatter["Index"] = current_data.index
+ current_scatter["Index"] = current_data.index.to_series()
x_name = "Index"
else:
current_scatter = {}
@@ -225,7 +225,8 @@ def get_one_column_drift(
column_name,
datetime_column_name,
)
- current_scatter["current (mean)"] = df
+ # TODO: assignment DataFrame to Series
+ current_scatter["current (mean)"] = df # type: ignore[assignment]
if prefix is None:
x_name = "Index binned"
else:
@@ -295,8 +296,13 @@ def get_one_column_drift(
if len(new_values) > 0:
raise ValueError(f"Values {new_values} not presented in 'target_names'")
else:
- current_column = current_column.map(dataset_columns.target_names)
- reference_column = reference_column.map(dataset_columns.target_names)
+ target_names_mapping = (
+ dataset_columns.target_names
+ if isinstance(dataset_columns.target_names, dict)
+ else {idx: value for (idx, value) in enumerate(dataset_columns.target_names)}
+ )
+ current_column = current_column.map(target_names_mapping)
+ reference_column = reference_column.map(target_names_mapping)
current_distribution, reference_distribution = get_distribution_for_column(
column_type=column_type.value,
current=current_column,
diff --git a/src/evidently/calculations/data_quality.py b/src/evidently/calculations/data_quality.py
index 3575ab7489..a3620f9d40 100644
--- a/src/evidently/calculations/data_quality.py
+++ b/src/evidently/calculations/data_quality.py
@@ -4,6 +4,7 @@
from typing import Callable
from typing import Dict
from typing import List
+from typing import Literal
from typing import Optional
from typing import Tuple
from typing import Union
@@ -187,7 +188,8 @@ def get_percentage_from_all_values(value: Union[int, float]) -> float:
if feature_type == ColumnType.Numerical:
# round most common feature value for numeric features to 1e-5
- if not np.issubdtype(feature, np.number):
+ # TODO: Fix this check
+ if not np.issubdtype(feature, np.number): # type: ignore[arg-type]
feature = feature.astype(float)
if isinstance(result.most_common_value, float):
result.most_common_value = np.round(result.most_common_value, 5)
@@ -197,11 +199,12 @@ def get_percentage_from_all_values(value: Union[int, float]) -> float:
result.min = np.round(feature.min(), 2)
common_stats = dict(feature.describe())
std = common_stats["std"]
- result.std = np.round(std, 2)
- result.mean = np.round(common_stats["mean"], 2)
- result.percentile_25 = np.round(common_stats["25%"], 2)
- result.percentile_50 = np.round(common_stats["50%"], 2)
- result.percentile_75 = np.round(common_stats["75%"], 2)
+ # TODO: Fix assignment types
+ result.std = np.round(std, 2) # type: ignore[assignment]
+ result.mean = np.round(common_stats["mean"], 2) # type: ignore[assignment]
+ result.percentile_25 = np.round(common_stats["25%"], 2) # type: ignore[assignment]
+ result.percentile_50 = np.round(common_stats["50%"], 2) # type: ignore[assignment]
+ result.percentile_75 = np.round(common_stats["75%"], 2) # type: ignore[assignment]
if feature_type == ColumnType.Datetime:
# cast datetime value to str for datetime features
@@ -449,15 +452,17 @@ def calculate_category_correlation(
def calculate_numerical_correlation(
column_display_name: str,
- column: pd.Series,
+ column: Optional[pd.Series],
features: pd.DataFrame,
) -> List[ColumnCorrelations]:
- if column.empty or features.empty:
+ if column is None or column.empty or features.empty:
return []
result = []
- for kind in ["pearson", "spearman", "kendall"]:
+ kind: Literal["pearson", "spearman", "kendall"]
+ kinds: List[Literal["pearson", "spearman", "kendall"]] = ["pearson", "spearman", "kendall"]
+ for kind in kinds:
correlations_columns = []
correlations_values = []
@@ -465,7 +470,8 @@ def calculate_numerical_correlation(
correlations_columns.append(other_column_name)
correlations_values.append(
column.replace([np.inf, -np.inf], np.nan).corr(
- features[other_column_name].replace([np.inf, -np.inf], np.nan), method=kind
+ features[other_column_name].replace([np.inf, -np.inf], np.nan),
+ method=kind,
)
)
@@ -473,7 +479,7 @@ def calculate_numerical_correlation(
ColumnCorrelations(
column_name=column_display_name,
kind=kind,
- values=Distribution(x=correlations_columns, y=correlations_values),
+ values=DistributionIncluded(x=correlations_columns, y=correlations_values),
)
)
diff --git a/src/evidently/calculations/recommender_systems.py b/src/evidently/calculations/recommender_systems.py
index c0666fcb06..4b5fdb7bf4 100644
--- a/src/evidently/calculations/recommender_systems.py
+++ b/src/evidently/calculations/recommender_systems.py
@@ -16,7 +16,7 @@ def collect_dataset(
bin_data: bool,
):
df = pd.concat([users, target, preds], axis=1)
- df.columns = ["users", "target", "preds"]
+ df.columns = pd.Index(["users", "target", "preds"])
if min_rel_score:
df["target"] = (df["target"] >= min_rel_score).astype(int)
if recommendations_type == RecomType.SCORE:
diff --git a/src/evidently/calculations/regression_performance.py b/src/evidently/calculations/regression_performance.py
index 7c77217387..40f77868f7 100644
--- a/src/evidently/calculations/regression_performance.py
+++ b/src/evidently/calculations/regression_performance.py
@@ -146,7 +146,7 @@ def _error_num_feature_bias(dataset, feature_name, err_quantiles: ErrorWithQuant
def _stable_value_counts(series: pd.Series):
- return series.value_counts().reindex(pd.unique(series))
+ return series.value_counts().reindex(pd.unique(series.to_numpy()))
def _error_cat_feature_bias(dataset, feature_name, err_quantiles: ErrorWithQuantiles) -> FeatureBias:
diff --git a/src/evidently/calculations/stattests/cramer_von_mises_stattest.py b/src/evidently/calculations/stattests/cramer_von_mises_stattest.py
index 63015adb52..d749675e94 100644
--- a/src/evidently/calculations/stattests/cramer_von_mises_stattest.py
+++ b/src/evidently/calculations/stattests/cramer_von_mises_stattest.py
@@ -208,7 +208,7 @@ def _cramer_von_mises(
p_value: p-value
test_result: whether the drift is detected
"""
- res = _cvm_2samp(reference_data.values, current_data.values)
+ res = _cvm_2samp(reference_data.to_numpy(), current_data.to_numpy())
return res.pvalue, res.pvalue <= threshold
diff --git a/src/evidently/calculations/stattests/fisher_exact_stattest.py b/src/evidently/calculations/stattests/fisher_exact_stattest.py
index 81ccf4a992..56f0af51aa 100644
--- a/src/evidently/calculations/stattests/fisher_exact_stattest.py
+++ b/src/evidently/calculations/stattests/fisher_exact_stattest.py
@@ -54,8 +54,8 @@ def _fisher_exact_stattest(
"""
if (
- (reference_data.isnull().values.any())
- or (current_data.isnull().values.any())
+ (reference_data.isnull().to_numpy().any())
+ or (current_data.isnull().to_numpy().any())
or (reference_data.isin([np.inf, -np.inf]).any())
or (current_data.isin([np.inf, -np.inf]).any())
):
diff --git a/src/evidently/calculations/stattests/hellinger_distance.py b/src/evidently/calculations/stattests/hellinger_distance.py
index 57e110fef1..64ea09c580 100644
--- a/src/evidently/calculations/stattests/hellinger_distance.py
+++ b/src/evidently/calculations/stattests/hellinger_distance.py
@@ -59,8 +59,8 @@ def _hellinger_distance(
if feature_type == ColumnType.Categorical:
dd: DefaultDict[int, int] = defaultdict(int)
- ref = (reference_data.value_counts() / len(reference_data)).to_dict(dd)
- curr = (current_data.value_counts() / len(current_data)).to_dict(dd)
+ ref = (reference_data.value_counts() / len(reference_data)).to_dict(into=dd)
+ curr = (current_data.value_counts() / len(current_data)).to_dict(into=dd)
hellinger_distance = 0.0
for key in keys:
@@ -73,8 +73,8 @@ def _hellinger_distance(
else:
bins = np.histogram_bin_edges(keys, bins="sturges")
- h1 = np.histogram(reference_data.values, bins=bins, density=True)[0]
- h2 = np.histogram(current_data.values, bins=bins, density=True)[0]
+ h1 = np.histogram(reference_data.to_numpy(), bins=bins, density=True)[0]
+ h2 = np.histogram(current_data.to_numpy(), bins=bins, density=True)[0]
bin_width = (max(bins) - min(bins)) / (len(bins) - 1)
diff --git a/src/evidently/calculations/stattests/mmd_stattest.py b/src/evidently/calculations/stattests/mmd_stattest.py
index da5e463883..99f23ab900 100644
--- a/src/evidently/calculations/stattests/mmd_stattest.py
+++ b/src/evidently/calculations/stattests/mmd_stattest.py
@@ -136,10 +136,10 @@ def _mmd_stattest(
p_value: p-value
test_result: whether the drift is detected
"""
- reference_data = reference_data.values.reshape(-1, 1)
- current_data = current_data.values.reshape(-1, 1)
+ transformed_ref = reference_data.to_numpy().reshape(-1, 1)
+ transformed_curr = current_data.to_numpy().reshape(-1, 1)
- p_value, mmd = mmd_pval(reference_data, current_data)
+ p_value, mmd = mmd_pval(transformed_ref, transformed_curr)
return p_value, p_value < threshold
diff --git a/src/evidently/calculations/stattests/utils.py b/src/evidently/calculations/stattests/utils.py
index 39a1bce86d..ebd6f8d1f6 100644
--- a/src/evidently/calculations/stattests/utils.py
+++ b/src/evidently/calculations/stattests/utils.py
@@ -100,17 +100,16 @@ def generate_fisher2x2_contingency_table(reference_data: pd.Series, current_data
if len(unique_categories) != 2:
unique_categories.add("placeholder")
- unique_categories = list(unique_categories) # type: ignore
- unique_categories = dict(zip(unique_categories, [0, 1])) # type: ignore
+ unique_categories_mapping = dict(zip(list(unique_categories), [0, 1]))
- reference_data = reference_data.map(unique_categories).values
- current_data = current_data.map(unique_categories).values
+ ref_data = reference_data.map(unique_categories_mapping).to_numpy()
+ curr_data = current_data.map(unique_categories_mapping).to_numpy()
- zero_ref = reference_data.size - np.count_nonzero(reference_data)
- one_ref = np.count_nonzero(reference_data)
+ zero_ref = ref_data.size - np.count_nonzero(ref_data)
+ one_ref = np.count_nonzero(ref_data)
- zero_cur = current_data.size - np.count_nonzero(current_data)
- one_cur = np.count_nonzero(current_data)
+ zero_cur = curr_data.size - np.count_nonzero(curr_data)
+ one_cur = np.count_nonzero(curr_data)
contingency_table = np.array([[one_cur, zero_cur], [one_ref, zero_ref]])
diff --git a/src/evidently/calculations/stattests/z_stattest.py b/src/evidently/calculations/stattests/z_stattest.py
index cdfe050a89..9ed4d1c421 100644
--- a/src/evidently/calculations/stattests/z_stattest.py
+++ b/src/evidently/calculations/stattests/z_stattest.py
@@ -35,7 +35,7 @@
from evidently.core import ColumnType
-def proportions_diff_z_stat_ind(ref: pd.DataFrame, curr: pd.DataFrame):
+def proportions_diff_z_stat_ind(ref: pd.Series, curr: pd.Series):
# pylint: disable=invalid-name
n1 = len(ref)
n2 = len(curr)
diff --git a/src/evidently/calculations/utils.py b/src/evidently/calculations/utils.py
index ed7c22d328..2ff745a34c 100644
--- a/src/evidently/calculations/utils.py
+++ b/src/evidently/calculations/utils.py
@@ -42,24 +42,38 @@ def make_hist_for_num_plot(curr: pd.Series, ref: pd.Series = None):
return result
-def make_hist_for_cat_plot(curr: pd.Series, ref: pd.Series = None, normalize: bool = False, dropna=False):
+def make_hist_for_cat_plot(curr: pd.Series, ref: pd.Series = None, normalize: bool = False, dropna: bool = False):
result = {}
- hist_df = curr.astype(str).value_counts(normalize=normalize, dropna=dropna).reset_index()
- hist_df.columns = ["x", "count"]
+ hist_df = (
+ curr.astype(str)
+ .value_counts( # type: ignore[call-overload]
+ normalize=normalize,
+ dropna=dropna,
+ )
+ .reset_index()
+ )
+ hist_df.columns = pd.Index(["x", "count"])
result["current"] = hist_df
if ref is not None:
- hist_df = ref.astype(str).value_counts(normalize=normalize, dropna=dropna).reset_index()
- hist_df.columns = ["x", "count"]
+ hist_df = (
+ ref.astype(str)
+ .value_counts( # type: ignore[call-overload]
+ normalize=normalize,
+ dropna=dropna,
+ )
+ .reset_index()
+ )
+ hist_df.columns = pd.Index(["x", "count"])
result["reference"] = hist_df
return result
def get_count_values(col1: pd.Series, col2: pd.Series, col1_name: str, col2_name: str):
df = pd.DataFrame({col2_name: col2, col1_name: col1})
- df = df.groupby([col2_name, col1_name], observed=False).size()
- df.name = "count_objects"
- df = df.reset_index()
- return df[df["count_objects"] > 0]
+ grouped = df.groupby([col2_name, col1_name], observed=False).size()
+ grouped.name = "count_objects"
+ grouped = grouped.reset_index()
+ return grouped[grouped["count_objects"] > 0]
def get_data_for_cat_cat_plot(
@@ -124,8 +138,8 @@ def prepare_box_data(
res = {}
for df, name in zip(dfs, names):
data = df.groupby(cat_feature_name, observed=False)[num_feature_name]
- df_for_plot = data.quantile([0, 0.25, 0.5, 0.75, 1]).reset_index()
- df_for_plot.columns = [cat_feature_name, "q", num_feature_name]
+ df_for_plot = data.quantile(np.array([0, 0.25, 0.5, 0.75, 1])).reset_index()
+ df_for_plot.columns = pd.Index([cat_feature_name, "q", num_feature_name])
res_df = {}
values = df_for_plot[cat_feature_name].unique()
@@ -231,11 +245,13 @@ def transform_df_to_time_count_view(
column_data: pd.Series,
):
df = pd.DataFrame({"period": period_data, datetime_column_name: datetime_data, data_column_name: column_data})
- df = df.groupby(["period", data_column_name]).size()
- df.name = "num"
- df = df.reset_index()
- df[datetime_column_name] = df["period"].dt.to_timestamp()
- return df[df["num"] > 0]
+ grouped = df.groupby(["period", data_column_name]).size()
+ if not isinstance(grouped, pd.Series):
+ raise ValueError("grouped has incorrect type")
+ grouped.name = "num"
+ grouped = grouped.reset_index()
+ grouped[datetime_column_name] = grouped["period"].dt.to_timestamp()
+ return grouped[grouped["num"] > 0]
def prepare_data_for_date_cat(date_curr, date_ref, datetime_name, cat_name, cat_curr, cat_ref):
diff --git a/src/evidently/descriptors/__init__.py b/src/evidently/descriptors/__init__.py
index 173a1be486..174c3a73f1 100644
--- a/src/evidently/descriptors/__init__.py
+++ b/src/evidently/descriptors/__init__.py
@@ -1,8 +1,13 @@
from . import _registry
+from .contains_link_descriptor import ContainsLink
from .custom_descriptor import CustomColumnEval
from .custom_descriptor import CustomPairColumnEval
+from .exact_match_descriptor import ExactMatch
from .hf_descriptor import HuggingFaceModel
from .hf_descriptor import HuggingFaceToxicityModel
+from .is_valid_json_descriptor import IsValidJSON
+from .is_valid_python_descriptor import IsValidPython
+from .json_match_descriptor import JSONMatch
from .json_schema_match_descriptor import JSONSchemaMatch
from .llm_judges import BiasLLMEval
from .llm_judges import ContextQualityLLMEval
@@ -29,6 +34,8 @@
from .word_count_descriptor import WordCount
from .words_descriptor import ExcludesWords
from .words_descriptor import IncludesWords
+from .words_descriptor import WordMatch
+from .words_descriptor import WordNoMatch
__all__ = [
"CustomColumnEval",
@@ -59,7 +66,14 @@
"SemanticSimilarity",
"SentenceCount",
"Sentiment",
+ "ExactMatch",
"RegExp",
+ "ContainsLink",
+ "WordMatch",
+ "WordNoMatch",
+ "IsValidJSON",
"JSONSchemaMatch",
+ "IsValidPython",
"_registry",
+ "JSONMatch",
]
diff --git a/src/evidently/descriptors/_registry.py b/src/evidently/descriptors/_registry.py
index 5ac97efc42..cf385db060 100644
--- a/src/evidently/descriptors/_registry.py
+++ b/src/evidently/descriptors/_registry.py
@@ -15,6 +15,11 @@
"evidently.descriptors.hf_descriptor.HuggingFaceToxicityModel",
"evidently:descriptor:HuggingFaceToxicityModel",
)
+register_type_alias(
+ FeatureDescriptor,
+ "evidently.descriptors.is_valid_python_descriptor.IsValidPython",
+ "evidently:descriptor:IsValidPython",
+)
register_type_alias(
FeatureDescriptor,
"evidently.descriptors.json_schema_match_descriptor.JSONSchemaMatch",
@@ -106,8 +111,30 @@
register_type_alias(
FeatureDescriptor, "evidently.descriptors.words_descriptor.IncludesWords", "evidently:descriptor:IncludesWords"
)
+register_type_alias(
+ FeatureDescriptor, "evidently.descriptors.words_descriptor.WordMatch", "evidently:descriptor:WordMatch"
+)
+register_type_alias(
+ FeatureDescriptor, "evidently.descriptors.words_descriptor.WordNoMatch", "evidently:descriptor:WordNoMatch"
+)
register_type_alias(
GeneralDescriptor,
"evidently.descriptors.custom_descriptor.CustomPairColumnEval",
"evidently:descriptor:CustomPairColumnEval",
)
+register_type_alias(
+ FeatureDescriptor,
+ "evidently.descriptors.json_match_descriptor.JSONMatch",
+ "evidently:descriptor:JSONMatch",
+)
+register_type_alias(
+ FeatureDescriptor,
+ "evidently.descriptors.contains_link_descriptor.ContainsLink",
+ "evidently:descriptor:ContainsLink",
+)
+register_type_alias(
+ FeatureDescriptor, "evidently.descriptors.exact_match_descriptor.ExactMatch", "evidently:descriptor:ExactMatch"
+)
+register_type_alias(
+ FeatureDescriptor, "evidently.descriptors.is_valid_json_descriptor.IsValidJSON", "evidently:descriptor:IsValidJSON"
+)
diff --git a/src/evidently/descriptors/contains_link_descriptor.py b/src/evidently/descriptors/contains_link_descriptor.py
new file mode 100644
index 0000000000..dfc78f7e6a
--- /dev/null
+++ b/src/evidently/descriptors/contains_link_descriptor.py
@@ -0,0 +1,11 @@
+from evidently.features import contains_link_feature
+from evidently.features.generated_features import FeatureDescriptor
+from evidently.features.generated_features import GeneratedFeature
+
+
+class ContainsLink(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:ContainsLink"
+
+ def feature(self, column_name: str) -> GeneratedFeature:
+ return contains_link_feature.ContainsLink(column_name, self.display_name)
diff --git a/src/evidently/descriptors/exact_match_descriptor.py b/src/evidently/descriptors/exact_match_descriptor.py
new file mode 100644
index 0000000000..7c03cf948f
--- /dev/null
+++ b/src/evidently/descriptors/exact_match_descriptor.py
@@ -0,0 +1,13 @@
+from evidently.features.exact_match_feature import ExactMatchFeature
+from evidently.features.generated_features import FeatureDescriptor
+from evidently.features.generated_features import GeneratedFeatures
+
+
+class ExactMatch(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:ExactMatch"
+
+ with_column: str
+
+ def feature(self, column_name: str) -> GeneratedFeatures:
+ return ExactMatchFeature(columns=[column_name, self.with_column], display_name=self.display_name)
diff --git a/src/evidently/descriptors/is_valid_json_descriptor.py b/src/evidently/descriptors/is_valid_json_descriptor.py
new file mode 100644
index 0000000000..e09202577c
--- /dev/null
+++ b/src/evidently/descriptors/is_valid_json_descriptor.py
@@ -0,0 +1,11 @@
+from evidently.features import is_valid_json_feature
+from evidently.features.generated_features import FeatureDescriptor
+from evidently.features.generated_features import GeneratedFeature
+
+
+class IsValidJSON(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:IsValidJSON"
+
+ def feature(self, column_name: str) -> GeneratedFeature:
+ return is_valid_json_feature.IsValidJSON(column_name, self.display_name)
diff --git a/src/evidently/descriptors/is_valid_python_descriptor.py b/src/evidently/descriptors/is_valid_python_descriptor.py
new file mode 100644
index 0000000000..48560174d8
--- /dev/null
+++ b/src/evidently/descriptors/is_valid_python_descriptor.py
@@ -0,0 +1,11 @@
+from evidently.features import is_valid_python_feature
+from evidently.features.generated_features import FeatureDescriptor
+from evidently.features.generated_features import GeneratedFeature
+
+
+class IsValidPython(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:IsValidPython"
+
+ def feature(self, column_name: str) -> GeneratedFeature:
+ return is_valid_python_feature.IsValidPython(column_name, self.display_name)
diff --git a/src/evidently/descriptors/json_match_descriptor.py b/src/evidently/descriptors/json_match_descriptor.py
new file mode 100644
index 0000000000..3d961d9284
--- /dev/null
+++ b/src/evidently/descriptors/json_match_descriptor.py
@@ -0,0 +1,13 @@
+from evidently.features import json_match_feature
+from evidently.features.generated_features import FeatureDescriptor
+from evidently.features.generated_features import GeneratedFeature
+
+
+class JSONMatch(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:JSONMatch"
+
+ with_column: str
+
+ def feature(self, column_name: str) -> GeneratedFeature:
+ return json_match_feature.JSONMatch(first_column=column_name, second_column=self.with_column)
diff --git a/src/evidently/descriptors/words_descriptor.py b/src/evidently/descriptors/words_descriptor.py
index 14e30060e5..1b96164505 100644
--- a/src/evidently/descriptors/words_descriptor.py
+++ b/src/evidently/descriptors/words_descriptor.py
@@ -39,3 +39,37 @@ def feature(self, column_name: str) -> GeneratedFeature:
self.lemmatize,
self.display_name,
)
+
+
+class WordMatch(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:WordMatch"
+
+ with_column: str
+ mode: str = "any"
+ lemmatize: bool = True
+
+ def feature(self, column_name: str) -> GeneratedFeature:
+ return words_feature.WordMatch(
+ columns=[column_name, self.with_column],
+ mode=self.mode,
+ lemmatize=self.lemmatize,
+ display_name=self.display_name,
+ )
+
+
+class WordNoMatch(FeatureDescriptor):
+ class Config:
+ type_alias = "evidently:descriptor:WordNoMatch"
+
+ with_column: str
+ mode: str = "any"
+ lemmatize: bool = True
+
+ def feature(self, column_name: str) -> GeneratedFeature:
+ return words_feature.WordNoMatch(
+ columns=[column_name, self.with_column],
+ mode=self.mode,
+ lemmatize=self.lemmatize,
+ display_name=self.display_name,
+ )
diff --git a/src/evidently/experimental/dataset_generators/llm/prompts.py b/src/evidently/experimental/dataset_generators/llm/prompts.py
index bb38038f57..9070613036 100644
--- a/src/evidently/experimental/dataset_generators/llm/prompts.py
+++ b/src/evidently/experimental/dataset_generators/llm/prompts.py
@@ -31,7 +31,7 @@ class Config:
]
@llm_call
- def generate(self, seed_question: str, number: int) -> List[str]: ...
+ def generate(self, seed_question: str, number: int) -> List[str]: ... # type: ignore[empty-body]
class QuestionsFromContextPromptTemplate(WithSystemPrompt, BlockPromptTemplate):
@@ -41,7 +41,7 @@ class Config:
system_prompt: str = "You are an assistant who generates questions based on provided context"
@llm_call
- def generate_questions(self, context: str, number: int) -> List[str]: ...
+ def generate_questions(self, context: str, number: int) -> List[str]: ... # type: ignore[empty-body]
class NaiveQuestionsFromContextPromptTemplate(QuestionsFromContextPromptTemplate):
diff --git a/src/evidently/features/OOV_words_percentage_feature.py b/src/evidently/features/OOV_words_percentage_feature.py
index 6ab72e6d9a..209c41e6cd 100644
--- a/src/evidently/features/OOV_words_percentage_feature.py
+++ b/src/evidently/features/OOV_words_percentage_feature.py
@@ -26,10 +26,9 @@ class Config:
_eng_words: Set
def __init__(self, column_name: str, ignore_words=(), display_name: Optional[str] = None):
- self.column_name = column_name
self.ignore_words = ignore_words
self.display_name = display_name
- super().__init__()
+ super().__init__(column_name=column_name)
def apply(self, value: Any):
if value is None or (isinstance(value, float) and np.isnan(value)):
diff --git a/src/evidently/features/_registry.py b/src/evidently/features/_registry.py
index b3f579a981..e5c9199968 100644
--- a/src/evidently/features/_registry.py
+++ b/src/evidently/features/_registry.py
@@ -27,6 +27,11 @@
"evidently.features.hf_feature.HuggingFaceToxicityFeature",
"evidently:feature:HuggingFaceToxicityFeature",
)
+register_type_alias(
+ GeneratedFeatures,
+ "evidently.features.is_valid_python_feature.IsValidPython",
+ "evidently:feature:IsValidPython",
+)
register_type_alias(
GeneratedFeatures,
"evidently.features.json_schema_match_feature.JSONSchemaMatch",
@@ -82,6 +87,21 @@
register_type_alias(
GeneratedFeatures, "evidently.features.words_feature.IncludesWords", "evidently:feature:IncludesWords"
)
+register_type_alias(
+ GeneratedFeatures, "evidently.features.words_feature.RowWordPresence", "evidently:feature:RowWordPresence"
+)
+register_type_alias(GeneratedFeatures, "evidently.features.words_feature.WordMatch", "evidently:feature:WordMatch")
+register_type_alias(GeneratedFeatures, "evidently.features.words_feature.WordNoMatch", "evidently:feature:WordNoMatch")
register_type_alias(
GeneratedFeatures, "evidently.features.words_feature.WordsPresence", "evidently:feature:WordsPresence"
)
+register_type_alias(GeneratedFeatures, "evidently.features.json_match_feature.JSONMatch", "evidently:feature:JSONMatch")
+register_type_alias(
+ GeneratedFeatures, "evidently.features.contains_link_feature.ContainsLink", "evidently:feature:ContainsLink"
+)
+register_type_alias(
+ GeneratedFeatures, "evidently.features.exact_match_feature.ExactMatchFeature", "evidently:feature:ExactMatchFeature"
+)
+register_type_alias(
+ GeneratedFeatures, "evidently.features.is_valid_json_feature.IsValidJSON", "evidently:feature:IsValidJSON"
+)
diff --git a/src/evidently/features/contains_link_feature.py b/src/evidently/features/contains_link_feature.py
new file mode 100644
index 0000000000..3d1978363a
--- /dev/null
+++ b/src/evidently/features/contains_link_feature.py
@@ -0,0 +1,36 @@
+from typing import Any
+from typing import ClassVar
+from typing import Optional
+from urllib.parse import urlparse
+
+import numpy as np
+
+from evidently import ColumnType
+from evidently.features.generated_features import ApplyColumnGeneratedFeature
+
+
+class ContainsLink(ApplyColumnGeneratedFeature):
+ class Config:
+ type_alias = "evidently:feature:ContainsLink"
+
+ __feature_type__: ClassVar = ColumnType.Categorical
+ display_name_template: ClassVar = "{column_name} contains link"
+ column_name: str
+
+ def __init__(self, column_name: str, display_name: Optional[str] = None):
+ self.column_name = column_name
+ self.display_name = display_name
+ super().__init__()
+
+ def apply(self, value: Any):
+ if value is None or (isinstance(value, float) and np.isnan(value)):
+ return 0
+ # Split the text into words
+ words = str(value).split()
+
+ # Check if any word is a valid URL using urlparse
+ for word in words:
+ parsed = urlparse(word)
+ if parsed.scheme and parsed.netloc:
+ return True
+ return False
diff --git a/src/evidently/features/exact_match_feature.py b/src/evidently/features/exact_match_feature.py
new file mode 100644
index 0000000000..4f46d43802
--- /dev/null
+++ b/src/evidently/features/exact_match_feature.py
@@ -0,0 +1,29 @@
+from typing import ClassVar
+from typing import List
+
+import pandas as pd
+
+from evidently.base_metric import ColumnName
+from evidently.core import ColumnType
+from evidently.features.generated_features import GeneratedFeature
+from evidently.utils.data_preprocessing import DataDefinition
+
+
+class ExactMatchFeature(GeneratedFeature):
+ class Config:
+ type_alias = "evidently:feature:ExactMatchFeature"
+
+ __feature_type__: ClassVar = ColumnType.Categorical
+ columns: List[str]
+
+ def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
+ return pd.DataFrame({self._feature_name(): data[self.columns[0]] == data[self.columns[1]]})
+
+ def _feature_name(self):
+ return "|".join(self.columns)
+
+ def _as_column(self) -> "ColumnName":
+ return self._create_column(
+ self._feature_name(),
+ default_display_name=f"Exact Match for {' '.join(self.columns)}.",
+ )
diff --git a/src/evidently/features/hf_feature.py b/src/evidently/features/hf_feature.py
index e002015458..5c517ceb93 100644
--- a/src/evidently/features/hf_feature.py
+++ b/src/evidently/features/hf_feature.py
@@ -27,8 +27,7 @@ def __init__(self, *, column_name: str, model: str, params: dict, display_name:
self.model = model
self.params = params
self.display_name = display_name
- self.feature_type = _model_type(model)
- super().__init__()
+ super().__init__(feature_type=_model_type(model))
def generate_data(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.Series:
val = _models.get(self.model)
@@ -60,10 +59,9 @@ def __init__(
toxic_label: Optional[str] = None,
):
self.column_name = column_name
- self.display_name = display_name
self.model = model
self.toxic_label = toxic_label
- super().__init__()
+ super().__init__(display_name=display_name)
def generate_data(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.Series:
return _toxicity(self.model, self.toxic_label, data[self.column_name])
diff --git a/src/evidently/features/is_valid_json_feature.py b/src/evidently/features/is_valid_json_feature.py
new file mode 100644
index 0000000000..b8f4289415
--- /dev/null
+++ b/src/evidently/features/is_valid_json_feature.py
@@ -0,0 +1,28 @@
+import json
+from typing import Any
+from typing import ClassVar
+from typing import Optional
+
+from evidently import ColumnType
+from evidently.features.generated_features import ApplyColumnGeneratedFeature
+
+
+class IsValidJSON(ApplyColumnGeneratedFeature):
+ class Config:
+ type_alias = "evidently:feature:IsValidJSON"
+
+ __feature_type__: ClassVar = ColumnType.Categorical
+ display_name_template: ClassVar = "JSON valid for {column_name}"
+ column_name: str
+
+ def __init__(self, column_name: str, display_name: Optional[str] = None):
+ self.column_name = column_name
+ self.display_name = display_name
+ super().__init__()
+
+ def apply(self, value: Any):
+ try:
+ json.loads(value)
+ except ValueError:
+ return False
+ return True
diff --git a/src/evidently/features/is_valid_python_feature.py b/src/evidently/features/is_valid_python_feature.py
new file mode 100644
index 0000000000..1c7a18c9de
--- /dev/null
+++ b/src/evidently/features/is_valid_python_feature.py
@@ -0,0 +1,28 @@
+import ast
+from typing import Any
+from typing import ClassVar
+from typing import Optional
+
+from evidently import ColumnType
+from evidently.features.generated_features import ApplyColumnGeneratedFeature
+
+
+class IsValidPython(ApplyColumnGeneratedFeature):
+ class Config:
+ type_alias = "evidently:feature:IsValidPython"
+
+ __feature_type__: ClassVar = ColumnType.Categorical
+ display_name_template: ClassVar = "Valid Python for {column_name}"
+ column_name: str
+
+ def __init__(self, column_name: str, display_name: Optional[str] = None):
+ self.column_name = column_name
+ self.display_name = display_name
+ super().__init__()
+
+ def apply(self, value: Any) -> bool:
+ try:
+ ast.parse(value)
+ return True
+ except (SyntaxError, TypeError):
+ return False
diff --git a/src/evidently/features/json_match_feature.py b/src/evidently/features/json_match_feature.py
new file mode 100644
index 0000000000..77d16dd014
--- /dev/null
+++ b/src/evidently/features/json_match_feature.py
@@ -0,0 +1,46 @@
+import json
+
+import pandas as pd
+
+from evidently import ColumnType
+from evidently.base_metric import ColumnName
+from evidently.features.generated_features import FeatureTypeFieldMixin
+from evidently.features.generated_features import GeneratedFeature
+from evidently.utils.data_preprocessing import DataDefinition
+
+
+class JSONMatch(FeatureTypeFieldMixin, GeneratedFeature):
+ class Config:
+ type_alias = "evidently:feature:JSONMatch"
+
+ first_column: str
+ second_column: str
+ feature_type: ColumnType = ColumnType.Categorical
+
+ def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
+ def compare_json_objects(first_json_object: str, second_json_object: str) -> bool:
+ try:
+ # Load both JSON strings into dictionaries
+ first_json = json.loads(first_json_object)
+ second_json = json.loads(second_json_object)
+
+ # Compare dictionaries for equality, ignoring order of keys
+ return first_json == second_json
+
+ except (ValueError, TypeError):
+ # Return False if either of the JSONs is invalid
+ return False
+
+ data[self._feature_column_name()] = data.apply(
+ lambda x: compare_json_objects(x[self.first_column], x[self.second_column]), axis=1
+ )
+ return pd.DataFrame(data[self._feature_column_name()])
+
+ def _as_column(self) -> "ColumnName":
+ return self._create_column(
+ self._feature_column_name(),
+ default_display_name=f"JSON match for columns {self.first_column} and {self.second_column}",
+ )
+
+ def _feature_column_name(self):
+ return f"JSON match for {self.first_column} and {self.second_column}"
diff --git a/src/evidently/features/llm_judge.py b/src/evidently/features/llm_judge.py
index 5a193c0853..bd69cb8b72 100644
--- a/src/evidently/features/llm_judge.py
+++ b/src/evidently/features/llm_judge.py
@@ -1,99 +1,42 @@
-import json
-from abc import ABC
from abc import abstractmethod
from enum import Enum
-from typing import Callable
from typing import ClassVar
from typing import Dict
from typing import Iterator
from typing import List
from typing import Optional
+from typing import Sequence
from typing import Tuple
-from typing import Type
-from typing import Union
import pandas as pd
from evidently import ColumnType
from evidently._pydantic_compat import Field
from evidently._pydantic_compat import PrivateAttr
-from evidently._pydantic_compat import SecretStr
from evidently.base_metric import ColumnName
-from evidently.errors import EvidentlyError
from evidently.features.generated_features import GeneratedFeatures
from evidently.options.base import Options
-from evidently.options.option import Option
from evidently.pydantic_utils import EnumValueMixin
-from evidently.pydantic_utils import EvidentlyBaseModel
from evidently.pydantic_utils import autoregister
from evidently.utils.data_preprocessing import DataDefinition
+from evidently.utils.llm.base import LLMMessage
+from evidently.utils.llm.prompts import PromptBlock
+from evidently.utils.llm.prompts import PromptTemplate
+from evidently.utils.llm.wrapper import LLMRequest
+from evidently.utils.llm.wrapper import LLMWrapper
+from evidently.utils.llm.wrapper import get_llm_wrapper
-LLMMessage = Tuple[str, str]
-LLMResponse = Dict[str, Union[str, float]]
-
-class EvidentlyLLMError(EvidentlyError):
- pass
-
-
-class LLMResponseParseError(EvidentlyLLMError):
- pass
-
-
-class LLMRequestError(EvidentlyLLMError):
- pass
-
-
-class LLMWrapper(ABC):
- __used_options__: ClassVar[List[Type[Option]]] = []
-
- @abstractmethod
- def complete(self, messages: List[LLMMessage]) -> str:
- raise NotImplementedError
-
- def get_used_options(self) -> List[Type[Option]]:
- return self.__used_options__
-
-
-LLMProvider = str
-LLMModel = str
-LLMWrapperProvider = Callable[[LLMModel, Options], LLMWrapper]
-_wrappers: Dict[Tuple[LLMProvider, Optional[LLMModel]], LLMWrapperProvider] = {}
-
-
-def llm_provider(name: LLMProvider, model: Optional[LLMModel]):
- def dec(f: LLMWrapperProvider):
- _wrappers[(name, model)] = f
- return f
-
- return dec
-
-
-def get_llm_wrapper(provider: LLMProvider, model: LLMModel, options: Options) -> LLMWrapper:
- key: Tuple[str, Optional[str]] = (provider, model)
- if key in _wrappers:
- return _wrappers[key](model, options)
- key = (provider, None)
- if key in _wrappers:
- return _wrappers[key](model, options)
- raise ValueError(f"LLM wrapper for provider {provider} model {model} not found")
-
-
-class BaseLLMPromptTemplate(EvidentlyBaseModel, ABC):
+class BaseLLMPromptTemplate(PromptTemplate):
class Config:
is_base_type = True
- @abstractmethod
- def iterate_messages(self, data: pd.DataFrame, input_columns: Dict[str, str]) -> Iterator[LLMMessage]:
- raise NotImplementedError
-
- @abstractmethod
- def get_system_prompts(self) -> List[LLMMessage]:
- raise NotImplementedError
-
- @abstractmethod
- def parse_response(self, response: str) -> LLMResponse:
- raise NotImplementedError
+ def iterate_messages(self, data: pd.DataFrame, input_columns: Dict[str, str]) -> Iterator[LLMRequest[dict]]:
+ template = self.get_template()
+ for _, column_values in data[list(input_columns)].rename(columns=input_columns).iterrows():
+ yield LLMRequest(
+ messages=self.get_messages(column_values, template), response_parser=self.parse, response_type=dict
+ )
@abstractmethod
def list_output_columns(self) -> List[str]:
@@ -103,10 +46,6 @@ def list_output_columns(self) -> List[str]:
def get_type(self, subcolumn: Optional[str]) -> ColumnType:
raise NotImplementedError
- @abstractmethod
- def get_prompt_template(self) -> str:
- raise NotImplementedError
-
class Uncertainty(str, Enum):
UNKNOWN = "unknown"
@@ -119,9 +58,6 @@ class BinaryClassificationPromptTemplate(BaseLLMPromptTemplate, EnumValueMixin):
class Config:
type_alias = "evidently:prompt_template:BinaryClassificationPromptTemplate"
- template: str = (
- """{__criteria__}\n{__task__}\n\n{__as__}\n{{input}}\n{__ae__}\n\n{__instructions__}\n\n{__output_format__}"""
- )
criteria: str = ""
instructions_template: str = (
"Use the following categories for classification:\n{__categories__}\n{__scoring__}\nThink step by step."
@@ -146,32 +82,6 @@ class Config:
pre_messages: List[LLMMessage] = Field(default_factory=list)
- def iterate_messages(self, data: pd.DataFrame, input_columns: Dict[str, str]) -> Iterator[LLMMessage]:
- prompt_template = self.get_prompt_template()
- for _, column_values in data[list(input_columns)].rename(columns=input_columns).iterrows():
- yield "user", prompt_template.format(**dict(column_values))
-
- def get_prompt_template(self) -> str:
- values = {
- "__criteria__": self._criteria(),
- "__task__": self._task(),
- "__instructions__": self._instructions(),
- "__output_format__": self._output_format(),
- "__as__": self.anchor_start,
- "__ae__": self.anchor_end,
- **self.placeholders,
- }
- return self.template.format(**values)
-
- def _task(self):
- return (
- f"Classify text between {self.anchor_start} and {self.anchor_end} "
- f"into two categories: {self.target_category} and {self.non_target_category}."
- )
-
- def _criteria(self):
- return self.criteria
-
def _instructions(self):
categories = (
(
@@ -203,30 +113,30 @@ def _uncertainty_class(self):
return self.target_category
raise ValueError(f"Unknown uncertainty value: {self.uncertainty}")
- def _output_format(self):
- values = []
- columns = {}
+ def get_blocks(self) -> Sequence[PromptBlock]:
+ fields = {}
if self.include_category:
cat = f"{self.target_category} or {self.non_target_category}"
if self.uncertainty == Uncertainty.UNKNOWN:
cat += " or UNKNOWN"
- columns[self.output_column] = f'"{cat}"'
- values.append("category")
+ fields["category"] = (cat, self.output_column)
if self.include_score:
- columns[self.output_score_column] = ""
- values.append("score")
+ fields["score"] = ("", self.output_score_column)
if self.include_reasoning:
- columns[self.output_reasoning_column] = '""'
- values.append("reasoning")
-
- keys = "\n".join(f'"{k}": {v}' for k, v in columns.items())
- return f"Return {', '.join(values)} formatted as json without formatting as follows:\n{{{{\n{keys}\n}}}}"
+ fields["reasoning"] = ('""', self.output_reasoning_column)
+ return [
+ PromptBlock.simple(self.criteria),
+ PromptBlock.simple(
+ f"Classify text between {self.anchor_start} and {self.anchor_end} "
+ f"into two categories: {self.target_category} and {self.non_target_category}."
+ ),
+ PromptBlock.input().anchored(self.anchor_start, self.anchor_end),
+ PromptBlock.simple(self._instructions()),
+ PromptBlock.json_output(**fields),
+ ]
- def parse_response(self, response: str) -> LLMResponse:
- try:
- return json.loads(response)
- except json.JSONDecodeError as e:
- raise LLMResponseParseError(f"Failed to parse response '{response}' as json") from e
+ def get_messages(self, values, template: Optional[str] = None) -> List[LLMMessage]:
+ return [*self.pre_messages, *super().get_messages(values)]
def list_output_columns(self) -> List[str]:
result = []
@@ -247,9 +157,6 @@ def get_type(self, subcolumn: Optional[str]) -> ColumnType:
return ColumnType.Categorical
raise ValueError(f"Unknown subcolumn {subcolumn}")
- def get_system_prompts(self) -> List[LLMMessage]:
- return self.pre_messages
-
class LLMJudge(GeneratedFeatures):
class Config:
@@ -281,12 +188,10 @@ def get_input_columns(self):
return {self.input_column: self.DEFAULT_INPUT_COLUMN}
def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition, options: Options) -> pd.DataFrame:
- result: List[Dict[str, Union[str, float]]] = []
+ result = self.get_llm_wrapper(options).run_batch_sync(
+ requests=self.template.iterate_messages(data, self.get_input_columns())
+ )
- for message in self.template.iterate_messages(data, self.get_input_columns()):
- messages: List[LLMMessage] = [*self.template.get_system_prompts(), message]
- response = self.get_llm_wrapper(options).complete(messages)
- result.append(self.template.parse_response(response))
return pd.DataFrame(result)
def list_columns(self) -> List["ColumnName"]:
@@ -300,50 +205,3 @@ def get_type(self, subcolumn: Optional[str] = None) -> ColumnType:
subcolumn = self._extract_subcolumn_name(subcolumn)
return self.template.get_type(subcolumn)
-
-
-class OpenAIKey(Option):
- api_key: Optional[SecretStr] = None
-
- def __init__(self, api_key: Optional[str] = None):
- self.api_key = SecretStr(api_key) if api_key is not None else None
- super().__init__()
-
- def get_value(self) -> Optional[str]:
- if self.api_key is None:
- return None
- return self.api_key.get_secret_value()
-
-
-@llm_provider("openai", None)
-class OpenAIWrapper(LLMWrapper):
- __used_options__: ClassVar = [OpenAIKey]
-
- def __init__(self, model: str, options: Options):
- import openai
-
- self.model = model
- self.client = openai.OpenAI(api_key=options.get(OpenAIKey).get_value())
-
- def complete(self, messages: List[LLMMessage]) -> str:
- import openai
-
- messages = [{"role": user, "content": msg} for user, msg in messages]
- try:
- response = self.client.chat.completions.create(model=self.model, messages=messages) # type: ignore[arg-type]
- except openai.OpenAIError as e:
- raise LLMRequestError("Failed to call OpenAI complete API") from e
- content = response.choices[0].message.content
- assert content is not None # todo: better error
- return content
-
-
-@llm_provider("litellm", None)
-class LiteLLMWrapper(LLMWrapper):
- def __init__(self, model: str):
- self.model = model
-
- def complete(self, messages: List[LLMMessage]) -> str:
- from litellm import completion
-
- return completion(model=self.model, messages=messages).choices[0].message.content
diff --git a/src/evidently/features/non_letter_character_percentage_feature.py b/src/evidently/features/non_letter_character_percentage_feature.py
index 4ce38ced52..8b0d1aa8de 100644
--- a/src/evidently/features/non_letter_character_percentage_feature.py
+++ b/src/evidently/features/non_letter_character_percentage_feature.py
@@ -17,9 +17,8 @@ class Config:
column_name: str
def __init__(self, column_name: str, display_name: Optional[str] = None):
- self.column_name = column_name
self.display_name = display_name
- super().__init__()
+ super().__init__(column_name=column_name)
def apply(self, value: Any):
"""counts share of characters that are not letters or spaces"""
diff --git a/src/evidently/features/openai_feature.py b/src/evidently/features/openai_feature.py
index 11100a4ac7..6e79436911 100644
--- a/src/evidently/features/openai_feature.py
+++ b/src/evidently/features/openai_feature.py
@@ -56,12 +56,11 @@ def __init__(
self.context_replace_string = context_replace_string
self.openai_params = openai_params or {}
self.model = model
- self.feature_type = ColumnType.Categorical if feature_type == "cat" else ColumnType.Numerical
self.column_name = column_name
self.display_name = display_name
self.check_mode = check_mode
self.possible_values = [v.lower() for v in possible_values] if possible_values else None
- super().__init__()
+ super().__init__(feature_type=ColumnType.Categorical if feature_type == "cat" else ColumnType.Numerical)
def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
from openai import OpenAI
diff --git a/src/evidently/features/sentence_count_feature.py b/src/evidently/features/sentence_count_feature.py
index 6b4215cae8..870657b5d8 100644
--- a/src/evidently/features/sentence_count_feature.py
+++ b/src/evidently/features/sentence_count_feature.py
@@ -19,9 +19,8 @@ class Config:
column_name: str
def __init__(self, column_name: str, display_name: Optional[str] = None):
- self.column_name = column_name
self.display_name = display_name
- super().__init__()
+ super().__init__(column_name=column_name)
def apply(self, value: Any):
if value is None or (isinstance(value, float) and np.isnan(value)):
diff --git a/src/evidently/features/sentiment_feature.py b/src/evidently/features/sentiment_feature.py
index 8226ac2e2f..f654c3fdfb 100644
--- a/src/evidently/features/sentiment_feature.py
+++ b/src/evidently/features/sentiment_feature.py
@@ -21,9 +21,8 @@ class Config:
_sid: Optional[SentimentIntensityAnalyzer] = PrivateAttr(None)
def __init__(self, column_name: str, display_name: Optional[str] = None):
- self.column_name = column_name
self.display_name = display_name
- super().__init__()
+ super().__init__(column_name=column_name)
@property
def sid(self):
diff --git a/src/evidently/features/text_length_feature.py b/src/evidently/features/text_length_feature.py
index 68a5bb1056..d350a2d9c6 100644
--- a/src/evidently/features/text_length_feature.py
+++ b/src/evidently/features/text_length_feature.py
@@ -17,9 +17,8 @@ class Config:
column_name: str
def __init__(self, column_name: str, display_name: Optional[str] = None):
- self.column_name = column_name
self.display_name = display_name
- super().__init__()
+ super().__init__(column_name=column_name)
def apply(self, value: Any):
if value is None or (isinstance(value, float) and np.isnan(value)):
diff --git a/src/evidently/features/text_part_feature.py b/src/evidently/features/text_part_feature.py
index e3a86d5e68..dd03481b95 100644
--- a/src/evidently/features/text_part_feature.py
+++ b/src/evidently/features/text_part_feature.py
@@ -35,12 +35,12 @@ def _feature_column_name(self) -> str:
return f"{self.column_name}.{self.prefix}.{self.case_sensitive}"
def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
- data = data[self.column_name]
+ column_data = data[self.column_name]
substr = self.prefix
if not self.case_sensitive:
- data = data.str.casefold()
+ column_data = column_data.str.casefold()
substr = substr.casefold()
- calculated = data.str.startswith(substr)
+ calculated = column_data.str.startswith(substr)
return pd.DataFrame({self._feature_column_name(): calculated})
def _as_column(self) -> ColumnName:
@@ -76,12 +76,12 @@ def _feature_column_name(self) -> str:
return f"{self.column_name}.{self.suffix}.{self.case_sensitive}"
def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
- data = data[self.column_name]
+ column_data = data[self.column_name]
substr = self.suffix
if not self.case_sensitive:
- data = data.str.casefold()
+ column_data = column_data.str.casefold()
substr = substr.casefold()
- calculated = data.str.endswith(substr)
+ calculated = column_data.str.endswith(substr)
return pd.DataFrame({self._feature_column_name(): calculated})
def _as_column(self) -> ColumnName:
diff --git a/src/evidently/features/trigger_words_presence_feature.py b/src/evidently/features/trigger_words_presence_feature.py
index 933cabcb55..675b7bd23c 100644
--- a/src/evidently/features/trigger_words_presence_feature.py
+++ b/src/evidently/features/trigger_words_presence_feature.py
@@ -29,11 +29,10 @@ def __init__(
lemmatize: bool = True,
display_name: Optional[str] = None,
):
- self.column_name = column_name
self.words_list = words_list
self.lemmatize = lemmatize
self.display_name = display_name
- super().__init__()
+ super().__init__(column_name=column_name)
@property
def lem(self):
diff --git a/src/evidently/features/word_count_feature.py b/src/evidently/features/word_count_feature.py
index 9154412fed..a36b4bfe0f 100644
--- a/src/evidently/features/word_count_feature.py
+++ b/src/evidently/features/word_count_feature.py
@@ -19,9 +19,8 @@ class Config:
column_name: str
def __init__(self, column_name: str, display_name: Optional[str] = None):
- self.column_name = column_name
self.display_name = display_name
- super().__init__()
+ super().__init__(column_name=column_name)
def apply(self, value: Any):
if value is None or (isinstance(value, float) and np.isnan(value)):
diff --git a/src/evidently/features/words_feature.py b/src/evidently/features/words_feature.py
index a85131585c..72526abf9a 100644
--- a/src/evidently/features/words_feature.py
+++ b/src/evidently/features/words_feature.py
@@ -5,11 +5,15 @@
from typing import Optional
import numpy as np
+import pandas as pd
from nltk.stem.wordnet import WordNetLemmatizer
from evidently._pydantic_compat import PrivateAttr
+from evidently.base_metric import ColumnName
from evidently.core import ColumnType
from evidently.features.generated_features import ApplyColumnGeneratedFeature
+from evidently.features.generated_features import GeneratedFeature
+from evidently.utils.data_preprocessing import DataDefinition
def _listed_words_present(
@@ -59,14 +63,13 @@ def __init__(
lemmatize: bool = True,
display_name: Optional[str] = None,
):
- self.column_name = column_name
self.words_list = words_list
if mode not in ["includes_any", "includes_all", "excludes_any", "excludes_all"]:
raise ValueError("mode must be either 'includes_any', 'includes_all', 'excludes_any' or 'excludes_all'")
self.mode = mode
self.lemmatize = lemmatize
self.display_name = display_name
- super().__init__()
+ super().__init__(column_name=column_name)
@property
def lem(self):
@@ -127,3 +130,83 @@ def _feature_display_name(self):
f"Text Excludes {self.mode} words [{self.words_list}],"
f" lemmatize: {self.lemmatize}] for {self.column_name}"
)
+
+
+class RowWordPresence(GeneratedFeature):
+ class Config:
+ type_alias = "evidently:feature:RowWordPresence"
+
+ __feature_type__: ClassVar = ColumnType.Categorical
+ columns: List[str]
+ mode: str = "any"
+ lemmatize: bool = True
+ _lem: Optional[WordNetLemmatizer] = PrivateAttr(None)
+
+ def __init__(self, columns: List[str], mode: str, lemmatize: bool, display_name: Optional[str] = None):
+ self.columns = columns
+ self.mode = mode
+ self.lemmatize = lemmatize
+ self.display_name = display_name
+ super().__init__()
+
+ def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame:
+ generated_col = data[self.columns[0]].fillna("")
+ expected_col = data[self.columns[1]].fillna("")
+
+ return pd.DataFrame(
+ {
+ self._feature_name(): pd.Series(
+ [
+ _listed_words_present(
+ in_str=generated_str,
+ mode=self.mode,
+ lemmatize=self.lemmatize,
+ words_list=expected_words,
+ lem=self.lem,
+ )
+ for generated_str, expected_words in zip(generated_col, expected_col)
+ ],
+ index=data.index,
+ )
+ }
+ )
+
+ @property
+ def lem(self):
+ if self._lem is None:
+ import nltk
+
+ nltk.download("wordnet", quiet=True)
+ self._lem = WordNetLemmatizer()
+ return self._lem
+
+ def _feature_name(self):
+ return "_".join(["RowWordPresence", self.columns[0], self.columns[1], str(self.lemmatize), str(self.mode)])
+
+
+class WordMatch(RowWordPresence):
+ class Config:
+ type_alias = "evidently:feature:WordMatch"
+
+ def __init__(self, columns: List[str], mode: str, lemmatize: bool, display_name: Optional[str] = None):
+ super().__init__(columns=columns, mode="includes_" + mode, lemmatize=lemmatize, display_name=display_name)
+
+ def _as_column(self) -> "ColumnName":
+ return self._create_column(
+ self._feature_name(),
+ default_display_name=f"Text contains {self.mode.split('_')[1]} defined words",
+ )
+
+
+class WordNoMatch(RowWordPresence):
+ class Config:
+ type_alias = "evidently:feature:WordNoMatch"
+
+ def __init__(self, columns: List[str], mode: str, lemmatize: bool, display_name: Optional[str] = None):
+ super().__init__(columns=columns, mode="excludes_" + mode, lemmatize=lemmatize, display_name=display_name)
+
+ def _as_column(self) -> "ColumnName":
+ return self._create_column(
+ self._feature_name(),
+ default_display_name=f"Text does not contain {self.mode.split('_')[1]} defined words",
+ )
diff --git a/src/evidently/metric_results.py b/src/evidently/metric_results.py
index fa614e48ed..a89b856bb3 100644
--- a/src/evidently/metric_results.py
+++ b/src/evidently/metric_results.py
@@ -236,9 +236,9 @@ def df_from_column_scatter(value: ColumnScatter) -> pd.DataFrame:
def column_scatter_from_df(df: Optional[pd.DataFrame], with_index: bool) -> Optional[ColumnScatter]:
if df is None:
return None
- data = {column: df[column] for column in df.columns}
+ data: ColumnScatter = {column: df[column] for column in df.columns}
if with_index:
- data["index"] = df.index
+ data["index"] = df.index.to_series()
return data
@@ -291,8 +291,9 @@ class Config:
type_alias = "evidently:metric_result:ColumnAggScatterResult"
field_tags = {"current": {IncludeTags.Current}, "reference": {IncludeTags.Reference}}
- current: ColumnAggScatter
- reference: Optional[ColumnAggScatter]
+ # TODO: fix type collision with super type
+ current: ColumnAggScatter # type: ignore[assignment]
+ reference: Optional[ColumnAggScatter] # type: ignore[assignment]
PlotData = List[float]
diff --git a/src/evidently/metrics/classification_performance/class_separation_metric.py b/src/evidently/metrics/classification_performance/class_separation_metric.py
index 485989bd0d..d19653be5c 100644
--- a/src/evidently/metrics/classification_performance/class_separation_metric.py
+++ b/src/evidently/metrics/classification_performance/class_separation_metric.py
@@ -53,8 +53,8 @@ def prepare_box_data(df: pd.DataFrame, target_name: str, prediction_names: List[
for name in prediction_names:
df_name = df.copy()
df_name[target_name] = (df_name[target_name] == name).astype(int)
- df_for_plot = df_name.groupby(target_name)[name].quantile([0, 0.25, 0.5, 0.75, 1]).reset_index()
- df_for_plot.columns = [target_name, "q", name]
+ df_for_plot = df_name.groupby(target_name)[name].quantile(np.array([0, 0.25, 0.5, 0.75, 1])).reset_index()
+ df_for_plot.columns = pd.Index([target_name, "q", name])
res_df = pd.DataFrame()
values = df_for_plot[target_name].unique()
@@ -108,9 +108,9 @@ def calculate(self, data: InputData) -> ClassificationClassSeparationPlotResults
reference=column_scatter_from_df(reference_plot, True),
target_name=target_name,
)
- current_plot = prepare_box_data(current_plot, target_name, prediction_names)
+ current_plot = prepare_box_data(current_plot, target_name, prediction_names.tolist())
if reference_plot is not None:
- reference_plot = prepare_box_data(reference_plot, target_name, prediction_names)
+ reference_plot = prepare_box_data(reference_plot, target_name, prediction_names.tolist())
return ClassificationClassSeparationPlotResults(
current=current_plot,
reference=reference_plot,
diff --git a/src/evidently/metrics/classification_performance/classification_dummy_metric.py b/src/evidently/metrics/classification_performance/classification_dummy_metric.py
index a8ca44f0cb..cdf9452b1a 100644
--- a/src/evidently/metrics/classification_performance/classification_dummy_metric.py
+++ b/src/evidently/metrics/classification_performance/classification_dummy_metric.py
@@ -76,8 +76,8 @@ def calculate(self, data: InputData) -> ClassificationDummyMetricResults:
# dummy by current
labels_ratio = data.current_data[target_name].value_counts(normalize=True)
np.random.seed(0)
- dummy_preds = np.random.choice(labels_ratio.index, data.current_data.shape[0], p=labels_ratio)
- dummy_preds = pd.Series(dummy_preds)
+ dummy_preds_choices = np.random.choice(labels_ratio.index, data.current_data.shape[0], p=labels_ratio)
+ dummy_preds = pd.Series(dummy_preds_choices)
prediction: Optional[PredictionData] = None
if prediction_name is not None:
@@ -147,7 +147,7 @@ def calculate(self, data: InputData) -> ClassificationDummyMetricResults:
}
if prediction is not None and prediction.prediction_probas is not None:
# dummy log_loss and roc_auc
- binaraized_target = (target.astype(str).values.reshape(-1, 1) == list(labels)).astype(int)
+ binaraized_target = (target.astype(str).to_numpy().reshape(-1, 1) == list(labels)).astype(int)
dummy_prediction = np.full(
prediction.prediction_probas.shape,
1 / prediction.prediction_probas.shape[1],
@@ -161,8 +161,8 @@ def calculate(self, data: InputData) -> ClassificationDummyMetricResults:
if data.reference_data is not None:
labels_ratio = data.reference_data[target_name].value_counts(normalize=True)
np.random.seed(1)
- dummy_preds = np.random.choice(labels_ratio.index, data.current_data.shape[0], p=labels_ratio)
- dummy_preds = pd.Series(dummy_preds)
+ dummy_preds_choices = np.random.choice(labels_ratio.index, data.current_data.shape[0], p=labels_ratio)
+ dummy_preds = pd.Series(dummy_preds_choices)
if prediction_name is not None:
target, prediction = self.get_target_prediction_data(data.current_data, data.column_mapping)
@@ -192,7 +192,7 @@ def calculate(self, data: InputData) -> ClassificationDummyMetricResults:
)
if prediction is not None and prediction.prediction_probas is not None:
# dummy log_loss and roc_auc
- binaraized_target = (target.astype(str).values.reshape(-1, 1) == list(labels)).astype(int)
+ binaraized_target = (target.astype(str).to_numpy().reshape(-1, 1) == list(labels)).astype(int)
dummy_prediction = np.full(
prediction.prediction_probas.shape,
1 / prediction.prediction_probas.shape[1],
@@ -298,5 +298,9 @@ def render_html(self, obj: ClassificationDummyMetric) -> List[BaseWidgetInfo]:
return [
header_text(label="Dummy Classification Quality"),
- table_data(column_names=columns, data=np.around(in_table_data, 3).values, title=""),
+ table_data(
+ column_names=columns,
+ data=np.around(in_table_data, 3).values, # type: ignore[attr-defined]
+ title="",
+ ),
]
diff --git a/src/evidently/metrics/classification_performance/lift_curve_metric.py b/src/evidently/metrics/classification_performance/lift_curve_metric.py
index 6c17a3a136..e5bb02915d 100644
--- a/src/evidently/metrics/classification_performance/lift_curve_metric.py
+++ b/src/evidently/metrics/classification_performance/lift_curve_metric.py
@@ -1,3 +1,5 @@
+from typing import Any
+from typing import Dict
from typing import List
from typing import Optional
@@ -9,6 +11,7 @@
from evidently.calculations.classification_performance import calculate_lift_table
from evidently.calculations.classification_performance import get_prediction_data
from evidently.core import IncludeTags
+from evidently.metric_results import Label
from evidently.metric_results import LiftCurve
from evidently.metric_results import LiftCurveData
from evidently.metric_results import PredictionData
@@ -62,9 +65,9 @@ def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData)
labels = prediction.labels
if prediction.prediction_probas is None:
raise ValueError("Lift Curve can be calculated only " "on binary probabilistic predictions")
- binaraized_target = (target_data.values.reshape(-1, 1) == labels).astype(int)
- lift_curve = {}
- lift_table = {}
+ binaraized_target = (target_data.to_numpy().reshape(-1, 1) == labels).astype(int)
+ lift_curve: LiftCurve = {}
+ lift_table: Dict[Label, Any] = {}
if len(labels) <= 2:
binaraized_target = pd.DataFrame(binaraized_target[:, 0])
binaraized_target.columns = ["target"]
@@ -90,7 +93,6 @@ def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData)
max_lift=[i[9] for i in lift_table[prediction.prediction_probas.columns[0]]],
relative_lift=[i[10] for i in lift_table[prediction.prediction_probas.columns[0]]],
percent=[i[11] for i in lift_table[prediction.prediction_probas.columns[0]]],
- # percent = lift_table[prediction.prediction_probas.columns[0]][0][11],
)
else:
binaraized_target = pd.DataFrame(binaraized_target)
@@ -106,7 +108,6 @@ def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData)
lift_table[label] = calculate_lift_table(binded)
for label in labels:
- # lift_curve[int(prediction.prediction_probas.columns[0])] = LiftCurveData(
lift_curve[label] = LiftCurveData(
lift=[i[8] for i in lift_table[prediction.prediction_probas.columns[0]]],
top=[i[0] for i in lift_table[prediction.prediction_probas.columns[0]]],
@@ -120,7 +121,6 @@ def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData)
max_lift=[i[9] for i in lift_table[prediction.prediction_probas.columns[0]]],
relative_lift=[i[10] for i in lift_table[prediction.prediction_probas.columns[0]]],
percent=[i[11] for i in lift_table[prediction.prediction_probas.columns[0]]],
- # percent = lift_table[prediction.prediction_probas.columns[0]][0][11],
)
return lift_curve
diff --git a/src/evidently/metrics/classification_performance/lift_table_metric.py b/src/evidently/metrics/classification_performance/lift_table_metric.py
index e9dc86391c..8a7edb3f64 100644
--- a/src/evidently/metrics/classification_performance/lift_table_metric.py
+++ b/src/evidently/metrics/classification_performance/lift_table_metric.py
@@ -105,7 +105,7 @@ def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData):
labels = prediction.labels
if prediction.prediction_probas is None:
raise ValueError("Lift Table can be calculated only on " "binary probabilistic predictions")
- binaraized_target = (target_data.values.reshape(-1, 1) == labels).astype(int)
+ binaraized_target = (target_data.to_numpy().reshape(-1, 1) == labels).astype(int)
lift_table = {}
if len(labels) <= 2:
binaraized_target = pd.DataFrame(binaraized_target[:, 0])
diff --git a/src/evidently/metrics/classification_performance/pr_curve_metric.py b/src/evidently/metrics/classification_performance/pr_curve_metric.py
index b2537bc6fe..ad86e57161 100644
--- a/src/evidently/metrics/classification_performance/pr_curve_metric.py
+++ b/src/evidently/metrics/classification_performance/pr_curve_metric.py
@@ -58,8 +58,8 @@ def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData)
labels = prediction.labels
if prediction.prediction_probas is None:
raise ValueError("PR Curve can be calculated only on binary probabilistic predictions")
- binaraized_target = (target_data.values.reshape(-1, 1) == labels).astype(int)
- pr_curve = {}
+ binaraized_target = (target_data.to_numpy().reshape(-1, 1) == labels).astype(int)
+ pr_curve: PRCurve = {}
if len(labels) <= 2:
binaraized_target = pd.DataFrame(binaraized_target[:, 0])
binaraized_target.columns = ["target"]
diff --git a/src/evidently/metrics/classification_performance/pr_table_metric.py b/src/evidently/metrics/classification_performance/pr_table_metric.py
index a12e26eed0..c9cee3b4ce 100644
--- a/src/evidently/metrics/classification_performance/pr_table_metric.py
+++ b/src/evidently/metrics/classification_performance/pr_table_metric.py
@@ -74,12 +74,12 @@ def calculate(self, data: InputData) -> ClassificationPRTableResults:
reference=ref_pr_table,
)
- def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData):
+ def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData) -> PRTable:
labels = prediction.labels
if prediction.prediction_probas is None:
raise ValueError("PR Table can be calculated only on binary probabilistic predictions")
- binaraized_target = (target_data.values.reshape(-1, 1) == labels).astype(int)
- pr_table = {}
+ binaraized_target = (target_data.to_numpy().reshape(-1, 1) == labels).astype(int)
+ pr_table: PRTable = {}
if len(labels) <= 2:
binaraized_target = pd.DataFrame(binaraized_target[:, 0])
binaraized_target.columns = ["target"]
diff --git a/src/evidently/metrics/classification_performance/quality_by_class_metric.py b/src/evidently/metrics/classification_performance/quality_by_class_metric.py
index 9bf73c2fe2..70eb5bd99a 100644
--- a/src/evidently/metrics/classification_performance/quality_by_class_metric.py
+++ b/src/evidently/metrics/classification_performance/quality_by_class_metric.py
@@ -90,7 +90,9 @@ def calculate(self, data: InputData) -> ClassificationQualityByClassResult:
current_roc_aucs = None
if prediction.prediction_probas is not None:
- binaraized_target = (target.values.reshape(-1, 1) == list(prediction.prediction_probas.columns)).astype(int)
+ binaraized_target = (target.to_numpy().reshape(-1, 1) == list(prediction.prediction_probas.columns)).astype(
+ int
+ )
current_roc_aucs = sklearn.metrics.roc_auc_score(
binaraized_target, prediction.prediction_probas, average=None
).tolist()
@@ -108,7 +110,7 @@ def calculate(self, data: InputData) -> ClassificationQualityByClassResult:
).classes
if ref_prediction.prediction_probas is not None:
binaraized_target = (
- ref_target.values.reshape(-1, 1) == list(ref_prediction.prediction_probas.columns)
+ ref_target.to_numpy().reshape(-1, 1) == list(ref_prediction.prediction_probas.columns)
).astype(int)
reference_roc_aucs = sklearn.metrics.roc_auc_score(
binaraized_target, ref_prediction.prediction_probas, average=None
diff --git a/src/evidently/metrics/classification_performance/quality_by_feature_table.py b/src/evidently/metrics/classification_performance/quality_by_feature_table.py
index c89235cff4..fbbab8bf88 100644
--- a/src/evidently/metrics/classification_performance/quality_by_feature_table.py
+++ b/src/evidently/metrics/classification_performance/quality_by_feature_table.py
@@ -117,7 +117,7 @@ def calculate(self, data: InputData) -> ClassificationQualityByFeatureTableResul
curr_predictions = get_prediction_data(data.current_data, dataset_columns, data.column_mapping.pos_label)
ref_predictions = None
if ref_df is not None:
- ref_predictions = get_prediction_data(data.reference_data, dataset_columns, data.column_mapping.pos_label)
+ ref_predictions = get_prediction_data(ref_df, dataset_columns, data.column_mapping.pos_label)
if self.columns is None:
columns = (
dataset_columns.num_feature_names
@@ -143,7 +143,7 @@ def calculate(self, data: InputData) -> ClassificationQualityByFeatureTableResul
columns.remove(column)
columns += list(features.keys())
curr_text_df = pd.concat([data.get_current_column(x.as_column()) for x in features.values()], axis=1)
- curr_text_df.columns = list(features.keys())
+ curr_text_df.columns = pd.Index(list(features.keys()))
curr_df = pd.concat([curr_df.reset_index(drop=True), curr_text_df.reset_index(drop=True)], axis=1)
if ref_df is not None:
@@ -151,7 +151,7 @@ def calculate(self, data: InputData) -> ClassificationQualityByFeatureTableResul
[data.get_reference_column(x.as_column()) for x in features.values()],
axis=1,
)
- ref_text_df.columns = list(features.keys())
+ ref_text_df.columns = pd.Index(list(features.keys()))
ref_df = pd.concat([ref_df.reset_index(drop=True), ref_text_df.reset_index(drop=True)], axis=1)
table_columns = set(columns + [target_name])
diff --git a/src/evidently/metrics/classification_performance/roc_curve_metric.py b/src/evidently/metrics/classification_performance/roc_curve_metric.py
index 4850a7aae7..cb8000e5c2 100644
--- a/src/evidently/metrics/classification_performance/roc_curve_metric.py
+++ b/src/evidently/metrics/classification_performance/roc_curve_metric.py
@@ -60,8 +60,8 @@ def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData)
labels = prediction.labels
if prediction.prediction_probas is None:
raise ValueError("Roc Curve can be calculated only on binary probabilistic predictions")
- binaraized_target = (target_data.values.reshape(-1, 1) == labels).astype(int)
- roc_curve = {}
+ binaraized_target = (target_data.to_numpy().reshape(-1, 1) == labels).astype(int)
+ roc_curve: ROCCurve = {}
if len(labels) <= 2:
binaraized_target = pd.DataFrame(binaraized_target[:, 0])
binaraized_target.columns = ["target"]
diff --git a/src/evidently/metrics/data_drift/column_drift_metric.py b/src/evidently/metrics/data_drift/column_drift_metric.py
index 097ad41cfc..53788873c8 100644
--- a/src/evidently/metrics/data_drift/column_drift_metric.py
+++ b/src/evidently/metrics/data_drift/column_drift_metric.py
@@ -144,12 +144,12 @@ def get_one_column_drift(
column.name: current_feature_data.values,
"Timestamp": None if datetime_data is None else datetime_data.values,
},
- index=index_data.values,
+ index=index_data.values, # type: ignore[arg-type]
),
column.name,
datetime_name,
)
- current_scatter["current (mean)"] = df
+ current_scatter["current (mean)"] = df # type: ignore[assignment]
if prefix is None:
x_name = "Index binned"
else:
@@ -281,7 +281,8 @@ def calculate(self, data: InputData) -> ColumnDataDriftMetrics:
reference_feature_data = data.get_reference_column(self.column_name)
except ColumnNotFound as ex:
raise ValueError(f"Cannot find column '{ex.column_name}' in reference dataset")
-
+ if reference_feature_data is None:
+ raise ValueError(f"Cannot find column '{self.column_name.display_name}' in reference dataset")
column_type = ColumnType.Numerical
if self.column_name.is_main_dataset():
column_type = data.data_definition.get_column(self.column_name.name).column_type
@@ -299,7 +300,7 @@ def calculate(self, data: InputData) -> ColumnDataDriftMetrics:
current_feature_data=current_feature_data,
reference_feature_data=reference_feature_data,
column=self.column_name,
- index_data=data.current_data.index,
+ index_data=data.current_data.index.to_series(),
column_type=column_type,
datetime_data=data.current_data[datetime_column.column_name] if datetime_column else None,
data_definition=data.data_definition,
@@ -338,9 +339,11 @@ def render_html(self, obj: ColumnDriftMetric) -> List[BaseWidgetInfo]:
# fig_json = fig.to_plotly_json()
if result.scatter is not None:
if obj.get_options().render_options.raw_data:
+ if not isinstance(result.scatter, ScatterField):
+ raise ValueError("Result have incompatible type")
scatter_fig = plot_scatter_for_data_drift(
- curr_y=result.scatter.scatter[result.column_name],
- curr_x=result.scatter.scatter[result.scatter.x_name],
+ curr_y=result.scatter.scatter[result.column_name].tolist(),
+ curr_x=result.scatter.scatter[result.scatter.x_name].tolist(),
y0=result.scatter.plot_shape["y0"],
y1=result.scatter.plot_shape["y1"],
y_name=result.column_name,
diff --git a/src/evidently/metrics/data_drift/data_drift_table.py b/src/evidently/metrics/data_drift/data_drift_table.py
index f611bdebc7..819f464565 100644
--- a/src/evidently/metrics/data_drift/data_drift_table.py
+++ b/src/evidently/metrics/data_drift/data_drift_table.py
@@ -13,6 +13,7 @@
from evidently.core import IncludeTags
from evidently.metric_results import DatasetColumns
from evidently.metric_results import HistogramData
+from evidently.metric_results import ScatterField
from evidently.metrics.data_drift.base import WithDriftOptions
from evidently.metrics.data_drift.feature_importance import FeatureImportanceMetric
from evidently.model.widget import BaseWidgetInfo
@@ -221,9 +222,11 @@ def _generate_column_params(
data_drift = "Detected" if data.drift_detected else "Not Detected"
if data.column_type == "num" and data.scatter is not None:
if not agg_data:
+ if not isinstance(data.scatter, ScatterField):
+ raise ValueError("data.scatter has incompatible type")
scatter_fig = plot_scatter_for_data_drift(
- curr_y=data.scatter.scatter[data.column_name],
- curr_x=data.scatter.scatter[data.scatter.x_name],
+ curr_y=data.scatter.scatter[data.column_name].tolist(),
+ curr_x=data.scatter.scatter[data.scatter.x_name].tolist(),
y0=data.scatter.plot_shape["y0"],
y1=data.scatter.plot_shape["y1"],
y_name=data.column_name,
diff --git a/src/evidently/metrics/data_drift/feature_importance.py b/src/evidently/metrics/data_drift/feature_importance.py
index 028881fe99..2fd7545712 100644
--- a/src/evidently/metrics/data_drift/feature_importance.py
+++ b/src/evidently/metrics/data_drift/feature_importance.py
@@ -72,9 +72,9 @@ def get_feature_importance_from_samples(
for col in [x.column_name for x in cat_cols]:
enc = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan)
- curr_sampled_data[col] = enc.fit_transform(curr_sampled_data[col].astype(str).values.reshape(-1, 1))
+ curr_sampled_data[col] = enc.fit_transform(curr_sampled_data[col].astype(str).to_numpy().reshape(-1, 1))
if ref_sampled_data is not None:
- ref_sampled_data[col] = enc.fit_transform(ref_sampled_data[col].astype(str).values.reshape(-1, 1))
+ ref_sampled_data[col] = enc.fit_transform(ref_sampled_data[col].astype(str).to_numpy().reshape(-1, 1))
task = data_definition.task
target_column = data_definition.get_target_column()
diff --git a/src/evidently/metrics/data_drift/target_by_features_table.py b/src/evidently/metrics/data_drift/target_by_features_table.py
index af8d7cd9c4..9049fe3c3f 100644
--- a/src/evidently/metrics/data_drift/target_by_features_table.py
+++ b/src/evidently/metrics/data_drift/target_by_features_table.py
@@ -167,7 +167,7 @@ def calculate(self, data: InputData) -> TargetByFeaturesTableResults:
[data.get_current_column(x.as_column()) for x in list(self._text_features_gen[col].values())],
axis=1,
)
- curr_text_df.columns = list(self._text_features_gen[col].keys())
+ curr_text_df.columns = pd.Index(list(self._text_features_gen[col].keys()))
curr_df = pd.concat(
[
curr_df.reset_index(drop=True),
@@ -181,7 +181,7 @@ def calculate(self, data: InputData) -> TargetByFeaturesTableResults:
[data.get_reference_column(x.as_column()) for x in list(self._text_features_gen[col].values())],
axis=1,
)
- ref_text_df.columns = list(self._text_features_gen[col].keys())
+ ref_text_df.columns = pd.Index(list(self._text_features_gen[col].keys()))
ref_df = pd.concat(
[
ref_df.reset_index(drop=True),
@@ -221,10 +221,12 @@ def render_html(self, obj: TargetByFeaturesTable) -> List[BaseWidgetInfo]:
current_data = result.current.plot_data
# todo: better typing
assert current_data is not None
- reference_data = result.reference.plot_data if result.reference is not None else None
+ if result.reference is None:
+ raise ValueError("reference is not set but required")
+ reference_data = result.reference.plot_data
target_name = result.target_name
curr_predictions = result.current.predictions
- ref_predictions = result.reference.predictions if result.reference is not None else None
+ ref_predictions = result.reference.predictions
columns = result.columns
task = result.task
if curr_predictions is not None and ref_predictions is not None:
diff --git a/src/evidently/metrics/data_drift/text_descriptors_drift_metric.py b/src/evidently/metrics/data_drift/text_descriptors_drift_metric.py
index 5bd2c38135..78ae9321c3 100644
--- a/src/evidently/metrics/data_drift/text_descriptors_drift_metric.py
+++ b/src/evidently/metrics/data_drift/text_descriptors_drift_metric.py
@@ -20,6 +20,7 @@
from evidently.features.generated_features import GeneratedFeature
from evidently.metric_results import DatasetColumns
from evidently.metric_results import HistogramData
+from evidently.metric_results import ScatterField
from evidently.model.widget import BaseWidgetInfo
from evidently.options.base import AnyOptions
from evidently.options.data_drift import DataDriftOptions
@@ -115,15 +116,18 @@ def calculate(self, data: InputData) -> TextDescriptorsDriftMetricResults:
[data.get_current_column(x.as_column()) for x in list(self.generated_text_features.values())],
axis=1,
)
- curr_text_df.columns = list(self.generated_text_features.keys())
+ curr_text_df.columns = pd.Index(list(self.generated_text_features.keys()))
ref_text_df = pd.concat(
[data.get_reference_column(x.as_column()) for x in list(self.generated_text_features.values())],
axis=1,
)
- ref_text_df.columns = list(self.generated_text_features.keys())
+ ref_text_df.columns = pd.Index(list(self.generated_text_features.keys()))
# text_dataset_columns = DatasetColumns(num_feature_names=curr_text_df.columns)
- text_dataset_columns = process_columns(ref_text_df, ColumnMapping(numerical_features=ref_text_df.columns))
+ text_dataset_columns = process_columns(
+ ref_text_df,
+ ColumnMapping(numerical_features=ref_text_df.columns.tolist()),
+ )
drift_by_columns: Dict[str, ColumnDataDriftMetrics] = {}
for col in curr_text_df.columns:
@@ -173,9 +177,11 @@ def _generate_column_params(
data_drift = "Detected" if data.drift_detected else "Not Detected"
if data.column_type == "num" and data.scatter is not None:
if not agg_data:
+ if not isinstance(data.scatter, ScatterField):
+ raise ValueError(f"TypeMismatch, data.scatter({type(data.scatter)}) expected to be ScatterField ")
scatter_fig = plot_scatter_for_data_drift(
- curr_y=data.scatter.scatter[data.column_name],
- curr_x=data.scatter.scatter[data.scatter.x_name],
+ curr_y=data.scatter.scatter[data.column_name].tolist(),
+ curr_x=data.scatter.scatter[data.scatter.x_name].tolist(),
y0=data.scatter.plot_shape["y0"],
y1=data.scatter.plot_shape["y1"],
y_name=data.column_name,
diff --git a/src/evidently/metrics/data_integrity/column_summary_metric.py b/src/evidently/metrics/data_integrity/column_summary_metric.py
index eaf286be8f..73b9ad8ad1 100644
--- a/src/evidently/metrics/data_integrity/column_summary_metric.py
+++ b/src/evidently/metrics/data_integrity/column_summary_metric.py
@@ -204,20 +204,20 @@ def plot_data(
elif column_type == ColumnType.Datetime:
prefix, freq = choose_agg_period(current_data, reference_data)
curr_data = current_data.dt.to_period(freq=freq).value_counts().reset_index()
- curr_data.columns = ["x", "number_of_items"]
+ curr_data.columns = pd.Index(["x", "number_of_items"])
curr_data["x"] = curr_data["x"].dt.to_timestamp()
reference = None
if reference_data is not None:
ref_data = reference_data.dt.to_period(freq=freq).value_counts().reset_index()
- ref_data.columns = ["x", "number_of_items"]
+ ref_data.columns = pd.Index(["x", "number_of_items"])
ref_data["x"] = ref_data["x"].dt.to_timestamp()
max_ref_date = ref_data["x"].max()
min_curr_date = curr_data["x"].min()
if max_ref_date == min_curr_date:
curr_data, ref_data = _split_periods(curr_data, ref_data, "x")
reference = ref_data
- reference.columns = ["x", "count"]
- curr_data.columns = ["x", "count"]
+ reference.columns = pd.Index(["x", "count"])
+ curr_data.columns = pd.Index(["x", "count"])
data_hist = Histogram(
current=HistogramData.from_df(curr_data),
reference=HistogramData.from_df(reference) if reference is not None else None,
@@ -458,11 +458,11 @@ def calculate(self, data: InputData) -> ColumnSummaryResult:
if column_type in [ColumnType.Categorical, ColumnType.Numerical]:
counts_of_values = {}
current_counts = column_current_data.value_counts(dropna=False).reset_index()
- current_counts.columns = ["x", "count"]
+ current_counts.columns = pd.Index(["x", "count"])
counts_of_values["current"] = current_counts.head(10)
if column_reference_data is not None:
reference_counts = column_reference_data.value_counts(dropna=False).reset_index()
- reference_counts.columns = ["x", "count"]
+ reference_counts.columns = pd.Index(["x", "count"])
counts_of_values["reference"] = reference_counts.head(10)
return ColumnSummaryResult(
@@ -540,9 +540,12 @@ def get_text_stats(
oov = data.get_current_column(generated_text_features["oov"].as_column())
non_letter_char = data.get_current_column(generated_text_features["non_letter_char"].as_column())
else:
- text_length = data.get_reference_column(generated_text_features["text_length"].as_column())
- oov = data.get_reference_column(generated_text_features["oov"].as_column())
- non_letter_char = data.get_reference_column(generated_text_features["non_letter_char"].as_column())
+ text_length_ref = data.get_reference_column(generated_text_features["text_length"].as_column())
+ oov_ref = data.get_reference_column(generated_text_features["oov"].as_column())
+ non_letter_char_ref = data.get_reference_column(generated_text_features["non_letter_char"].as_column())
+ if text_length_ref is None or oov_ref is None or non_letter_char_ref is None:
+ raise ValueError("Reference required but not present in data")
+ (text_length, oov, non_letter_char) = (text_length_ref, oov_ref, non_letter_char_ref)
return TextCharacteristics(
number_of_rows=number_of_rows,
diff --git a/src/evidently/metrics/data_quality/column_category_metric.py b/src/evidently/metrics/data_quality/column_category_metric.py
index 380db52257..fa2af32b7a 100644
--- a/src/evidently/metrics/data_quality/column_category_metric.py
+++ b/src/evidently/metrics/data_quality/column_category_metric.py
@@ -50,7 +50,7 @@ class Config:
}
smart_union = True
- def __init__(self, **data):
+ def __init__(self, **data) -> None:
"""for backward compatibility"""
if "counts_of_values" in data:
counts_of_values: Dict[str, pd.DataFrame] = data.pop("counts_of_values")
@@ -105,12 +105,12 @@ def calculate(self, data: InputData) -> ColumnCategoryMetricResult:
counts_of_values = {}
current_counts = current_column.value_counts(dropna=False).reset_index()
- current_counts.columns = ["x", "count"]
+ current_counts.columns = pd.Index(["x", "count"])
counts_of_values["current"] = current_counts.head(10)
counts_of_values["current"].index = counts_of_values["current"].index.astype("str")
if reference_column is not None:
reference_counts = reference_column.value_counts(dropna=False).reset_index()
- reference_counts.columns = ["x", "count"]
+ reference_counts.columns = pd.Index(["x", "count"])
counts_of_values["reference"] = reference_counts.head(10)
counts_of_values["reference"].index = counts_of_values["reference"].index.astype("str")
diff --git a/src/evidently/metrics/data_quality/column_correlations_metric.py b/src/evidently/metrics/data_quality/column_correlations_metric.py
index 31ce9a9c10..8f8d6fa968 100644
--- a/src/evidently/metrics/data_quality/column_correlations_metric.py
+++ b/src/evidently/metrics/data_quality/column_correlations_metric.py
@@ -110,6 +110,8 @@ def calculate(self, data: InputData) -> ColumnCorrelationsMetricResult:
reference_correlations = None
if reference_data is not None:
+ if data.reference_data is None:
+ raise ValueError("data.reference_data was not set but part of it available")
reference_correlations = self._calculate_correlation(
self.column_name,
reference_data,
diff --git a/src/evidently/metrics/data_quality/column_value_list_metric.py b/src/evidently/metrics/data_quality/column_value_list_metric.py
index 840934b4af..6bba50477d 100644
--- a/src/evidently/metrics/data_quality/column_value_list_metric.py
+++ b/src/evidently/metrics/data_quality/column_value_list_metric.py
@@ -115,14 +115,14 @@ def _calculate_stats(values: list, column: pd.Series) -> ValueListStat:
else:
values_not_in_list[value] = value_counts[value]
- number_in_list = sum(values_in_list.values())
+ number_in_list = sum(values_in_list.values()) # type: ignore[arg-type]
share_in_list = number_in_list / rows_count
number_not_in_list = rows_count - number_in_list
share_not_in_list = number_not_in_list / rows_count
# fill other values from list with zeroes
for value in values:
if value not in values_in_list:
- values_in_list[value] = 0
+ values_in_list[value] = 0 # type: ignore[assignment]
return ValueListStat(
number_in_list=number_in_list,
diff --git a/src/evidently/metrics/data_quality/dataset_correlations_metric.py b/src/evidently/metrics/data_quality/dataset_correlations_metric.py
index ffaa3f5038..c24528baf5 100644
--- a/src/evidently/metrics/data_quality/dataset_correlations_metric.py
+++ b/src/evidently/metrics/data_quality/dataset_correlations_metric.py
@@ -140,8 +140,14 @@ def _get_correlations_stats(correlation: pd.DataFrame, data_definition: DataDefi
# fill diagonal with 1 values for getting abs max values
np.fill_diagonal(correlation_matrix.values, 0)
- if prediction_name in correlation_matrix and target_name in correlation_matrix:
- target_prediction_correlation = correlation_matrix.loc[prediction_name, target_name]
+ target_prediction_correlation: Optional[pd.DataFrame]
+ if (
+ prediction_name is not None
+ and target_name is not None
+ and prediction_name in correlation_matrix
+ and target_name in correlation_matrix
+ ):
+ target_prediction_correlation = correlation_matrix.loc[prediction_name, target_name] # type: ignore[assignment]
if pd.isnull(target_prediction_correlation):
target_prediction_correlation = None
@@ -149,8 +155,10 @@ def _get_correlations_stats(correlation: pd.DataFrame, data_definition: DataDefi
else:
target_prediction_correlation = None
- if target_name in correlation_matrix:
- abs_max_target_features_correlation = correlation_matrix.loc[target_name, columns_corr].abs().max()
+ abs_max_target_features_correlation: Optional[pd.Series]
+ if target_name is not None and target_name in correlation_matrix:
+ cols = [target_name] + columns_corr
+ abs_max_target_features_correlation = correlation_matrix.loc[cols].abs().max().max()
if pd.isnull(abs_max_target_features_correlation):
abs_max_target_features_correlation = None
@@ -158,8 +166,10 @@ def _get_correlations_stats(correlation: pd.DataFrame, data_definition: DataDefi
else:
abs_max_target_features_correlation = None
- if prediction_name in correlation_matrix:
- abs_max_prediction_features_correlation = correlation_matrix.loc[prediction_name, columns_corr].abs().max()
+ abs_max_prediction_features_correlation: Optional[pd.Series]
+ if prediction_name is not None and prediction_name in correlation_matrix:
+ cols = [prediction_name] + columns_corr
+ abs_max_prediction_features_correlation = correlation_matrix.loc[cols].abs().max().max()
if pd.isnull(abs_max_prediction_features_correlation):
abs_max_prediction_features_correlation = None
@@ -261,7 +271,7 @@ def calculate(self, data: InputData) -> DatasetCorrelationsMetricResult:
[data.get_current_column(x.as_column()) for x in list(self._text_features_gen[col].values())],
axis=1,
)
- curr_text_df.columns = list(self._text_features_gen[col].keys())
+ curr_text_df.columns = pd.Index(list(self._text_features_gen[col].keys()))
text_columns.append(list(curr_text_df.columns))
curr_df = pd.concat(
[
@@ -276,7 +286,7 @@ def calculate(self, data: InputData) -> DatasetCorrelationsMetricResult:
[data.get_reference_column(x.as_column()) for x in list(self._text_features_gen[col].values())],
axis=1,
)
- ref_text_df.columns = list(self._text_features_gen[col].keys())
+ ref_text_df.columns = pd.Index(list(self._text_features_gen[col].keys()))
ref_df = pd.concat(
[
ref_df.copy().reset_index(drop=True),
diff --git a/src/evidently/metrics/data_quality/text_descriptors_correlation_metric.py b/src/evidently/metrics/data_quality/text_descriptors_correlation_metric.py
index ce8b7bf785..22ebfa7c58 100644
--- a/src/evidently/metrics/data_quality/text_descriptors_correlation_metric.py
+++ b/src/evidently/metrics/data_quality/text_descriptors_correlation_metric.py
@@ -96,14 +96,14 @@ def calculate(self, data: InputData) -> TextDescriptorsCorrelationMetricResult:
[data.get_current_column(x.as_column()) for x in list(self.generated_text_features.values())],
axis=1,
)
- curr_text_df.columns = list(self.generated_text_features.keys())
+ curr_text_df.columns = pd.Index(list(self.generated_text_features.keys()))
ref_df = None
if data.reference_data is not None:
ref_text_df = pd.concat(
[data.get_reference_column(x.as_column()) for x in list(self.generated_text_features.values())],
axis=1,
)
- ref_text_df.columns = list(self.generated_text_features.keys())
+ ref_text_df.columns = pd.Index(list(self.generated_text_features.keys()))
ref_df = pd.concat(
[
data.reference_data.copy().reset_index(drop=True),
diff --git a/src/evidently/metrics/recsys/ndcg_k.py b/src/evidently/metrics/recsys/ndcg_k.py
index e4d0cc940e..20b9b254c8 100644
--- a/src/evidently/metrics/recsys/ndcg_k.py
+++ b/src/evidently/metrics/recsys/ndcg_k.py
@@ -31,7 +31,7 @@ def __init__(
def calculate(self, data: InputData) -> TopKMetricResult:
curr, ref = get_curr_and_ref_df(data, self.min_rel_score, self.no_feedback_users, False)
current = self.calculate_ndcg(curr, self.k)
- reference: Optional[dict] = None
+ reference: Optional[pd.Series] = None
if ref is not None:
reference = self.calculate_ndcg(ref, self.k)
diff --git a/src/evidently/metrics/recsys/pairwise_distance.py b/src/evidently/metrics/recsys/pairwise_distance.py
index 6b5d354608..1866c56082 100644
--- a/src/evidently/metrics/recsys/pairwise_distance.py
+++ b/src/evidently/metrics/recsys/pairwise_distance.py
@@ -62,11 +62,11 @@ def calculate(self, data: InputData) -> PairwiseDistanceResult:
all_items = all_items[all_items[prediction_name] <= self.k + 1]
all_items = all_items[[item_id.column_name] + self.item_features]
if current_train_data is not None:
- if not np.in1d(self.item_features, current_train_data.columns).all():
+ if not np.in1d(self.item_features, current_train_data.columns).all(): # type: ignore[attr-defined]
raise ValueError("current_train_data must contain item_features.")
all_items = pd.concat([all_items, current_train_data[[item_id.column_name] + self.item_features]])
if reference_train_data is not None:
- if not np.in1d(self.item_features, reference_train_data.columns).all():
+ if not np.in1d(self.item_features, reference_train_data.columns).all(): # type: ignore[attr-defined]
raise ValueError("reference_train_data must contain item_features.")
all_items = pd.concat([all_items, reference_train_data[[item_id.column_name] + self.item_features]])
diff --git a/src/evidently/metrics/recsys/scores_distribution.py b/src/evidently/metrics/recsys/scores_distribution.py
index 90cdc2e267..66a1188216 100644
--- a/src/evidently/metrics/recsys/scores_distribution.py
+++ b/src/evidently/metrics/recsys/scores_distribution.py
@@ -1,5 +1,6 @@
from typing import List
from typing import Optional
+from typing import Tuple
import pandas as pd
from scipy.special import softmax
@@ -57,11 +58,16 @@ def __init__(self, k: int, options: AnyOptions = None) -> None:
self.k = k
super().__init__(options=options)
- def get_distr(self, df, user_id, prediction_name):
+ def get_distr(
+ self,
+ df: pd.DataFrame,
+ user_id: Optional[str],
+ prediction_name: str,
+ ) -> Tuple[Distribution, Optional[Distribution], float]:
df["rank"] = df.groupby(user_id)[prediction_name].transform("rank", ascending=False)
top_k = df.loc[df["rank"] <= self.k, prediction_name]
if self.k == df["rank"].max:
- other: pd.Series = None
+ other: Optional[pd.Series] = None
else:
other = df.loc[df["rank"] > self.k, prediction_name]
top_k_distr, other_distr = get_distribution_for_column(column_type="num", current=top_k, reference=other)
diff --git a/src/evidently/metrics/regression_performance/abs_perc_error_in_time.py b/src/evidently/metrics/regression_performance/abs_perc_error_in_time.py
index 0a3922ec49..117f09bc99 100644
--- a/src/evidently/metrics/regression_performance/abs_perc_error_in_time.py
+++ b/src/evidently/metrics/regression_performance/abs_perc_error_in_time.py
@@ -3,10 +3,12 @@
from typing import Union
import numpy as np
+import pandas as pd
from evidently.base_metric import InputData
from evidently.base_metric import Metric
from evidently.base_metric import UsesRawDataMixin
+from evidently.metric_results import ColumnAggScatter
from evidently.metric_results import ColumnAggScatterResult
from evidently.metric_results import ColumnScatter
from evidently.metric_results import ColumnScatterResult
@@ -53,35 +55,38 @@ def calculate(self, data: InputData) -> ColumnScatterResult:
reference_scatter: Optional[Union[ColumnScatter, dict]] = None
raw_data = self.get_options().render_options.raw_data
if raw_data:
- current_scatter = {}
+ current_scatter: ColumnScatter = {}
current_scatter["Absolute Percentage Error"] = curr_df["Absolute Percentage Error"]
if datetime_column_name is not None:
current_scatter["x"] = curr_df[datetime_column_name]
x_name = "Timestamp"
else:
- current_scatter["x"] = curr_df.index
+ current_scatter["x"] = curr_df.index.to_series()
x_name = "Index"
if ref_df is not None:
reference_scatter = {}
reference_scatter["Absolute Percentage Error"] = ref_df["Absolute Percentage Error"]
- reference_scatter["x"] = ref_df[datetime_column_name] if datetime_column_name else ref_df.index
+ reference_scatter["x"] = (
+ ref_df[datetime_column_name] if datetime_column_name else ref_df.index.to_series()
+ )
return ColumnScatterResult(
current=current_scatter,
reference=reference_scatter,
x_name=x_name,
)
- current_scatter = {}
+ agg_current_scatter: ColumnAggScatter = {}
+ agg_reference_scatter: Optional[ColumnAggScatter] = None
plot_df, prefix = prepare_df_for_time_index_plot(curr_df, "Absolute Percentage Error", datetime_column_name)
- current_scatter["Absolute Percentage Error"] = plot_df
+ agg_current_scatter["Absolute Percentage Error"] = plot_df
x_name_ref: Optional[str] = None
if ref_df is not None:
- reference_scatter = {}
+ agg_reference_scatter = {}
plot_df, prefix_ref = prepare_df_for_time_index_plot(
ref_df, "Absolute Percentage Error", datetime_column_name
)
- reference_scatter["Absolute Percentage Error"] = plot_df
+ agg_reference_scatter["Absolute Percentage Error"] = plot_df
if datetime_column_name is None:
x_name_ref = "Index binned"
else:
@@ -90,18 +95,21 @@ def calculate(self, data: InputData) -> ColumnScatterResult:
x_name = "Index binned"
else:
x_name = datetime_column_name + f" ({prefix})"
- cls = ColumnScatterResult
- if not raw_data:
- cls = ColumnAggScatterResult
- return cls(
- current=current_scatter,
- reference=reference_scatter,
+ return ColumnAggScatterResult(
+ current=agg_current_scatter,
+ reference=agg_reference_scatter,
x_name=x_name,
x_name_ref=x_name_ref,
)
- def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime_column_name: Optional[str]):
+ def _make_df_for_plot(
+ self,
+ df: pd.DataFrame,
+ target_name: str,
+ prediction_name: str,
+ datetime_column_name: Optional[str],
+ ) -> pd.DataFrame:
result = df.replace([np.inf, -np.inf], np.nan)
if datetime_column_name is not None:
result.dropna(
@@ -110,9 +118,11 @@ def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime
inplace=True,
subset=[target_name, prediction_name, datetime_column_name],
)
- return result.sort_values(datetime_column_name)
+ result.sort_values(datetime_column_name, inplace=True)
+ return result
result.dropna(axis=0, how="any", inplace=True, subset=[target_name, prediction_name])
- return result.sort_index()
+ result.sort_index(inplace=True)
+ return result
@default_renderer(wrap_type=RegressionAbsPercentageErrorPlot)
diff --git a/src/evidently/metrics/regression_performance/error_bias_table.py b/src/evidently/metrics/regression_performance/error_bias_table.py
index d8875a026d..1472af241f 100644
--- a/src/evidently/metrics/regression_performance/error_bias_table.py
+++ b/src/evidently/metrics/regression_performance/error_bias_table.py
@@ -182,7 +182,7 @@ def calculate(self, data: InputData) -> RegressionErrorBiasTableResults:
num_feature_names += list(features.keys())
columns += list(features.keys())
curr_text_df = pd.concat([data.get_current_column(x.as_column()) for x in features.values()], axis=1)
- curr_text_df.columns = list(features.keys())
+ curr_text_df.columns = pd.Index(list(features.keys()))
curr_df = pd.concat([curr_df.reset_index(drop=True), curr_text_df.reset_index(drop=True)], axis=1)
if ref_df is not None:
@@ -190,7 +190,7 @@ def calculate(self, data: InputData) -> RegressionErrorBiasTableResults:
[data.get_reference_column(x.as_column()) for x in features.values()],
axis=1,
)
- ref_text_df.columns = list(features.keys())
+ ref_text_df.columns = pd.Index(list(features.keys()))
ref_df = pd.concat([ref_df.reset_index(drop=True), ref_text_df.reset_index(drop=True)], axis=1)
columns_ext = np.union1d(columns, [target_name, prediction_name])
diff --git a/src/evidently/metrics/regression_performance/error_distribution.py b/src/evidently/metrics/regression_performance/error_distribution.py
index 1c3caa0332..dea248c614 100644
--- a/src/evidently/metrics/regression_performance/error_distribution.py
+++ b/src/evidently/metrics/regression_performance/error_distribution.py
@@ -2,6 +2,7 @@
from typing import Optional
import numpy as np
+import pandas as pd
from evidently.base_metric import InputData
from evidently.base_metric import Metric
@@ -56,7 +57,13 @@ def calculate(self, data: InputData) -> RegressionErrorDistributionResults:
return RegressionErrorDistributionResults(current_bins=current_bins, reference_bins=reference_bins)
- def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime_column_name: Optional[str]):
+ def _make_df_for_plot(
+ self,
+ df: pd.DataFrame,
+ target_name: str,
+ prediction_name: str,
+ datetime_column_name: Optional[str],
+ ) -> pd.DataFrame:
result = df.replace([np.inf, -np.inf], np.nan)
if datetime_column_name is not None:
result.dropna(
@@ -65,9 +72,11 @@ def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime
inplace=True,
subset=[target_name, prediction_name, datetime_column_name],
)
- return result.sort_values(datetime_column_name)
+ result.sort_values(datetime_column_name, inplace=True)
+ return result
result.dropna(axis=0, how="any", inplace=True, subset=[target_name, prediction_name])
- return result.sort_index()
+ result.sort_index(inplace=True)
+ return result
@default_renderer(wrap_type=RegressionErrorDistribution)
diff --git a/src/evidently/metrics/regression_performance/error_in_time.py b/src/evidently/metrics/regression_performance/error_in_time.py
index dbea78ce57..50f5dc325e 100644
--- a/src/evidently/metrics/regression_performance/error_in_time.py
+++ b/src/evidently/metrics/regression_performance/error_in_time.py
@@ -1,3 +1,4 @@
+from typing import Dict
from typing import List
from typing import Optional
from typing import Union
@@ -9,7 +10,6 @@
from evidently.base_metric import Metric
from evidently.base_metric import UsesRawDataMixin
from evidently.metric_results import ColumnAggScatterResult
-from evidently.metric_results import ColumnScatter
from evidently.metric_results import ColumnScatterResult
from evidently.model.widget import BaseWidgetInfo
from evidently.options.base import AnyOptions
@@ -46,8 +46,8 @@ def calculate(self, data: InputData) -> ColumnScatterResult:
if ref_df is not None:
ref_df = self._make_df_for_plot(ref_df.copy(), target_name, prediction_name, datetime_column_name)
ref_error = ref_df[prediction_name] - ref_df[target_name]
- current_scatter = {}
- reference_scatter: Optional[Union[dict, ColumnScatter]] = None
+ current_scatter: Dict[str, Union[pd.Series, pd.DataFrame]] = {}
+ reference_scatter: Optional[Dict[str, Union[pd.Series, pd.DataFrame]]] = None
raw_data = self.get_options().render_options.raw_data
if raw_data:
current_scatter["Predicted - Actual"] = curr_error
@@ -55,13 +55,17 @@ def calculate(self, data: InputData) -> ColumnScatterResult:
current_scatter["x"] = curr_df[datetime_column_name]
x_name = "Timestamp"
else:
- current_scatter["x"] = curr_df.index
+ current_scatter["x"] = curr_df.index.to_series()
x_name = "Index"
if ref_df is not None:
+ if ref_error is None:
+ raise ValueError("ref_error is None but required")
reference_scatter = {}
reference_scatter["Predicted - Actual"] = ref_error
- reference_scatter["x"] = ref_df[datetime_column_name] if datetime_column_name else ref_df.index
+ reference_scatter["x"] = (
+ ref_df[datetime_column_name] if datetime_column_name else ref_df.index.to_series()
+ )
return ColumnScatterResult(
current=current_scatter,
@@ -96,7 +100,13 @@ def calculate(self, data: InputData) -> ColumnScatterResult:
x_name_ref=x_name_ref,
)
- def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime_column_name: Optional[str]):
+ def _make_df_for_plot(
+ self,
+ df: pd.DataFrame,
+ target_name: str,
+ prediction_name: str,
+ datetime_column_name: Optional[str],
+ ) -> pd.DataFrame:
result = df.replace([np.inf, -np.inf], np.nan)
if datetime_column_name is not None:
result.dropna(
@@ -105,9 +115,11 @@ def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime
inplace=True,
subset=[target_name, prediction_name, datetime_column_name],
)
- return result.sort_values(datetime_column_name)
+ result.sort_values(datetime_column_name, inplace=True)
+ return result
result.dropna(axis=0, how="any", inplace=True, subset=[target_name, prediction_name])
- return result.sort_index()
+ result.sort_index(inplace=True)
+ return result
@default_renderer(wrap_type=RegressionErrorPlot)
diff --git a/src/evidently/metrics/regression_performance/error_normality.py b/src/evidently/metrics/regression_performance/error_normality.py
index cd2a713624..199086461a 100644
--- a/src/evidently/metrics/regression_performance/error_normality.py
+++ b/src/evidently/metrics/regression_performance/error_normality.py
@@ -82,7 +82,13 @@ def calculate(self, data: InputData) -> RegressionErrorNormalityResults:
reference_theoretical=reference_theoretical,
)
- def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime_column_name: Optional[str]):
+ def _make_df_for_plot(
+ self,
+ df: pd.DataFrame,
+ target_name: str,
+ prediction_name: str,
+ datetime_column_name: Optional[str],
+ ) -> pd.DataFrame:
result = df.replace([np.inf, -np.inf], np.nan)
if datetime_column_name is not None:
result.dropna(
@@ -91,9 +97,11 @@ def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime
inplace=True,
subset=[target_name, prediction_name, datetime_column_name],
)
- return result.sort_values(datetime_column_name)
+ result.sort_values(datetime_column_name, inplace=True)
+ return result
result.dropna(axis=0, how="any", inplace=True, subset=[target_name, prediction_name])
- return result.sort_index()
+ result.sort_index(inplace=True)
+ return result
def _get_theoretical_line(self, res: Any):
x = [res[0][0][0], res[0][0][-1]]
@@ -104,7 +112,7 @@ def _get_plot_data(self, res: Any, err_data: pd.Series, agg_data: bool):
df = pd.DataFrame({"x": res[0][0], "y": res[0][1]})
if not agg_data:
return df
- df["bin"] = pd.cut(err_data.sort_values().values, bins=10, labels=False, retbins=False)
+ df["bin"] = pd.cut(err_data.sort_values().to_numpy(), bins=10, labels=False, retbins=False)
return (
df.groupby("bin", group_keys=False)
.apply(lambda x: x.sample(n=min(100, x.shape[0]), random_state=0))
diff --git a/src/evidently/metrics/regression_performance/predicted_and_actual_in_time.py b/src/evidently/metrics/regression_performance/predicted_and_actual_in_time.py
index d3772f3fda..03679aacd4 100644
--- a/src/evidently/metrics/regression_performance/predicted_and_actual_in_time.py
+++ b/src/evidently/metrics/regression_performance/predicted_and_actual_in_time.py
@@ -52,28 +52,35 @@ def calculate(self, data: InputData) -> ColumnScatterResult:
current_scatter["x"] = curr_df[datetime_column_name]
x_name = "Timestamp"
else:
- current_scatter["x"] = curr_df.index
+ current_scatter["x"] = curr_df.index.to_series()
x_name = "Index"
if ref_df is not None:
reference_scatter = {}
reference_scatter["Predicted"] = ref_df[prediction_name]
reference_scatter["Actual"] = ref_df[target_name]
- reference_scatter["x"] = ref_df[datetime_column_name] if datetime_column_name else ref_df.index
+ reference_scatter["x"] = (
+ ref_df[datetime_column_name] if datetime_column_name else ref_df.index.to_series()
+ )
return ColumnScatterResult(
current=current_scatter,
reference=reference_scatter,
x_name=x_name,
)
- current_scatter = {}
+ agg_current_scatter = {}
+ agg_reference_scatter = None
plot_df, prefix = prepare_df_for_time_index_plot(curr_df, prediction_name, datetime_column_name)
- current_scatter["Predicted"] = plot_df
- current_scatter["Actual"], _ = prepare_df_for_time_index_plot(curr_df, target_name, datetime_column_name)
+ agg_current_scatter["Predicted"] = plot_df
+ agg_current_scatter["Actual"], _ = prepare_df_for_time_index_plot(curr_df, target_name, datetime_column_name)
x_name_ref: Optional[str] = None
if ref_df is not None:
- reference_scatter = {}
+ agg_reference_scatter = {}
plot_df, prefix_ref = prepare_df_for_time_index_plot(ref_df, prediction_name, datetime_column_name)
- reference_scatter["Predicted"] = plot_df
- reference_scatter["Actual"], _ = prepare_df_for_time_index_plot(ref_df, target_name, datetime_column_name)
+ agg_reference_scatter["Predicted"] = plot_df
+ agg_reference_scatter["Actual"], _ = prepare_df_for_time_index_plot(
+ ref_df,
+ target_name,
+ datetime_column_name,
+ )
if datetime_column_name is None:
x_name_ref = "Index binned"
else:
@@ -82,11 +89,8 @@ def calculate(self, data: InputData) -> ColumnScatterResult:
x_name = "Index binned"
else:
x_name = datetime_column_name + f" ({prefix})"
- cls = ColumnScatterResult
- if not raw_data:
- cls = ColumnAggScatterResult
- return cls(
- current=current_scatter,
+ return ColumnAggScatterResult(
+ current=agg_current_scatter,
reference=reference_scatter,
x_name=x_name,
x_name_ref=x_name_ref,
diff --git a/src/evidently/metrics/regression_performance/regression_dummy_metric.py b/src/evidently/metrics/regression_performance/regression_dummy_metric.py
index e7355e37e9..781a581f4c 100644
--- a/src/evidently/metrics/regression_performance/regression_dummy_metric.py
+++ b/src/evidently/metrics/regression_performance/regression_dummy_metric.py
@@ -84,8 +84,8 @@ def calculate(self, data: InputData) -> RegressionDummyMetricResults:
# mape default values
# optimal constant for mape
s = data.current_data[target_name]
- inv_y = 1 / s[s != 0].values
- w = inv_y / sum(inv_y)
+ inv_y = 1.0 / s[s != 0].values # type: ignore[operator]
+ w = inv_y / sum(inv_y) # type: ignore[operator,arg-type]
idxs = np.argsort(w)
sorted_w = w[idxs]
sorted_w_cumsum = np.cumsum(sorted_w)
@@ -126,8 +126,8 @@ def calculate(self, data: InputData) -> RegressionDummyMetricResults:
# mape default values
# optimal constant for mape
s = data.reference_data[target_name]
- inv_y = 1 / s[s != 0].values
- w = inv_y / sum(inv_y)
+ inv_y = 1.0 / s[s != 0].values # type: ignore[operator]
+ w = inv_y / sum(inv_y) # type: ignore[operator,arg-type]
idxs = np.argsort(w)
sorted_w = w[idxs]
sorted_w_cumsum = np.cumsum(sorted_w)
@@ -210,5 +210,9 @@ def render_html(self, obj: RegressionDummyMetric) -> List[BaseWidgetInfo]:
return [
header_text(label="Dummy Regression Quality"),
- table_data(column_names=columns, data=np.around(in_table_data, 3).values, title=""),
+ table_data(
+ column_names=columns,
+ data=np.around(in_table_data, 3).values, # type: ignore[attr-defined]
+ title="",
+ ),
]
diff --git a/src/evidently/metrics/regression_performance/regression_performance_metrics.py b/src/evidently/metrics/regression_performance/regression_performance_metrics.py
index 0a33bcdf3e..5f6b2d7c70 100644
--- a/src/evidently/metrics/regression_performance/regression_performance_metrics.py
+++ b/src/evidently/metrics/regression_performance/regression_performance_metrics.py
@@ -152,8 +152,9 @@ def calculate(self, data: InputData) -> RegressionPerformanceMetricsResults:
# mape default values
# optimal constant for mape
s = data.current_data[data.column_mapping.target]
- inv_y = 1 / s[s != 0].values
- w = inv_y / sum(inv_y)
+ # TODO: Fix assignments
+ inv_y = 1 / s[s != 0].values # type: ignore[operator]
+ w = inv_y / sum(inv_y) # type: ignore[operator,arg-type]
idxs = np.argsort(w)
sorted_w = w[idxs]
sorted_w_cumsum = np.cumsum(sorted_w)
@@ -206,12 +207,6 @@ def calculate(self, data: InputData) -> RegressionPerformanceMetricsResults:
vals_for_plots: Dict[str, RegressionMetricScatter] = {}
- if data.reference_data is not None:
- is_ref_data = True
-
- else:
- is_ref_data = False
-
for name, func in zip(
["r2_score", "rmse", "mean_abs_error", "mean_abs_perc_error"],
[
@@ -226,14 +221,14 @@ def calculate(self, data: InputData) -> RegressionPerformanceMetricsResults:
func,
data.column_mapping.target,
data.column_mapping.prediction,
- is_ref_data,
+ data.reference_data is not None,
)
# me plot
err_curr = data.current_data[data.column_mapping.prediction] - data.current_data[data.column_mapping.target]
err_ref = None
- if is_ref_data:
+ if data.reference_data is not None:
err_ref = (
data.reference_data[data.column_mapping.prediction] - data.reference_data[data.column_mapping.target]
)
diff --git a/src/evidently/metrics/regression_performance/regression_quality.py b/src/evidently/metrics/regression_performance/regression_quality.py
index 992d80973c..7c36a8c7a2 100644
--- a/src/evidently/metrics/regression_performance/regression_quality.py
+++ b/src/evidently/metrics/regression_performance/regression_quality.py
@@ -145,8 +145,9 @@ def calculate(self, data: InputData) -> RegressionQualityMetricResults:
# mape default values
# optimal constant for mape
s = data.current_data[target_name]
- inv_y = 1 / s[s != 0].values
- w = inv_y / sum(inv_y)
+ # TODO: fix typing
+ inv_y = 1 / s[s != 0].values # type: ignore[operator]
+ w = inv_y / sum(inv_y) # type: ignore[operator,arg-type]
idxs = np.argsort(w)
sorted_w = w[idxs]
sorted_w_cumsum = np.cumsum(sorted_w)
@@ -219,7 +220,7 @@ def calculate(self, data: InputData) -> RegressionQualityMetricResults:
err_curr = data.current_data[prediction_name] - data.current_data[target_name]
err_ref = None
- if is_ref_data:
+ if data.reference_data is not None:
err_ref = data.reference_data[prediction_name] - data.reference_data[target_name]
me_hist_for_plot = make_hist_for_num_plot(err_curr, err_ref)
diff --git a/src/evidently/metrics/regression_performance/top_error.py b/src/evidently/metrics/regression_performance/top_error.py
index 243ef1c566..cfeeae05d7 100644
--- a/src/evidently/metrics/regression_performance/top_error.py
+++ b/src/evidently/metrics/regression_performance/top_error.py
@@ -130,7 +130,13 @@ def calculate(self, data: InputData) -> RegressionTopErrorMetricResults:
agg_data=True,
)
- def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime_column_name: Optional[str]):
+ def _make_df_for_plot(
+ self,
+ df: pd.DataFrame,
+ target_name: str,
+ prediction_name: str,
+ datetime_column_name: Optional[str],
+ ) -> pd.DataFrame:
result = df.replace([np.inf, -np.inf], np.nan)
if datetime_column_name is not None:
result.dropna(
@@ -139,9 +145,11 @@ def _make_df_for_plot(self, df, target_name: str, prediction_name: str, datetime
inplace=True,
subset=[target_name, prediction_name, datetime_column_name],
)
- return result.sort_values(datetime_column_name)
+ result.sort_values(datetime_column_name, inplace=True)
+ return result
result.dropna(axis=0, how="any", inplace=True, subset=[target_name, prediction_name])
- return result.sort_index()
+ result.sort_index(inplace=True)
+ return result
@staticmethod
def _get_data_for_scatter(df: pd.DataFrame, target_name: str, prediction_name: str) -> RegressionScatter:
diff --git a/src/evidently/options/base.py b/src/evidently/options/base.py
index a31787f075..c3c3e1cce4 100644
--- a/src/evidently/options/base.py
+++ b/src/evidently/options/base.py
@@ -109,8 +109,10 @@ def dict(
exclude = {"custom"}
elif isinstance(exclude, set):
exclude.add("custom")
- else:
+ elif isinstance(exclude, dict):
exclude["custom"] = False
+ else:
+ raise TypeError("exclude must be either a dict or a set")
return super().dict(
include=include,
exclude=exclude,
diff --git a/src/evidently/pydantic_utils.py b/src/evidently/pydantic_utils.py
index 983330107d..971935959a 100644
--- a/src/evidently/pydantic_utils.py
+++ b/src/evidently/pydantic_utils.py
@@ -28,6 +28,7 @@
from typing_inspect import is_union_type
from evidently._pydantic_compat import SHAPE_DICT
+from evidently._pydantic_compat import BaseConfig
from evidently._pydantic_compat import BaseModel
from evidently._pydantic_compat import Field
from evidently._pydantic_compat import ModelMetaclass
@@ -189,7 +190,7 @@ def is_not_abstract(cls):
class PolymorphicModel(BaseModel):
- class Config(BaseModel.Config):
+ class Config(BaseConfig):
# value to put into "type" field
type_alias: ClassVar[Optional[str]] = None
# flag to mark alias required. If not required, classpath is used by default
@@ -200,7 +201,7 @@ class Config(BaseModel.Config):
# flag to mark type as base. This means it will be possible to parse all subclasses of it as this type
is_base_type: ClassVar[bool] = False
- __config__: ClassVar[Config]
+ __config__: ClassVar[Type[Config]] = Config
@classmethod
def __get_type__(cls):
@@ -272,7 +273,7 @@ def validate(cls: Type[TPM], value: Any) -> TPM:
subcls = import_string(classpath)
except ImportError as e:
raise ValueError(f"Error importing subclass from '{classpath}'") from e
- return subcls.validate(value)
+ return subcls.validate(value) # type: ignore[return-value]
return super().validate(value) # type: ignore[misc]
diff --git a/src/evidently/renderers/base_renderer.py b/src/evidently/renderers/base_renderer.py
index ff7237092c..89901fdc89 100644
--- a/src/evidently/renderers/base_renderer.py
+++ b/src/evidently/renderers/base_renderer.py
@@ -1,9 +1,12 @@
import dataclasses
import warnings
from typing import TYPE_CHECKING
+from typing import Any
from typing import Dict
+from typing import Generic
from typing import List
from typing import Optional
+from typing import TypeVar
from typing import Union
import pandas as pd
@@ -16,7 +19,6 @@
if TYPE_CHECKING:
from evidently.base_metric import Metric
- from evidently.base_metric import TResult
from evidently.core import IncludeOptions
from evidently.tests.base_test import Test
@@ -34,8 +36,11 @@ def __init__(self, color_options: Optional[ColorOptions] = None) -> None:
self.color_options = color_options
-class MetricRenderer(BaseRenderer):
- def render_pandas(self, obj: "Metric[TResult]") -> pd.DataFrame:
+TMetric = TypeVar("TMetric", bound="Metric")
+
+
+class MetricRenderer(Generic[TMetric], BaseRenderer):
+ def render_pandas(self, obj: TMetric) -> pd.DataFrame:
result = obj.get_result()
if not result.__config__.pd_include:
warnings.warn(
@@ -46,7 +51,7 @@ def render_pandas(self, obj: "Metric[TResult]") -> pd.DataFrame:
def render_json(
self,
- obj: "Metric[TResult]",
+ obj: TMetric,
include_render: bool = False,
include: "IncludeOptions" = None,
exclude: "IncludeOptions" = None,
@@ -54,14 +59,14 @@ def render_json(
result = obj.get_result()
return result.get_dict(include_render=include_render, include=include, exclude=exclude)
- def render_html(self, obj) -> List[BaseWidgetInfo]:
+ def render_html(self, obj: TMetric) -> List[BaseWidgetInfo]:
raise NotImplementedError()
@dataclasses.dataclass
class DetailsInfo:
title: str
- info: BaseWidgetInfo
+ info: Union[BaseWidgetInfo, Any]
id: str = dataclasses.field(default_factory=lambda: str(uuid6.uuid7()))
@@ -78,14 +83,17 @@ def with_details(self, title: str, info: BaseWidgetInfo):
return self
-class TestRenderer(BaseRenderer):
- def html_description(self, obj: "Test"):
+TTest = TypeVar("TTest", bound="Test")
+
+
+class TestRenderer(Generic[TTest], BaseRenderer):
+ def html_description(self, obj: TTest):
return obj.get_result().description
- def json_description(self, obj: "Test"):
+ def json_description(self, obj: TTest):
return obj.get_result().description
- def render_html(self, obj: "Test") -> TestHtmlInfo:
+ def render_html(self, obj: TTest) -> TestHtmlInfo:
result = obj.get_result()
return TestHtmlInfo(
name=result.name,
@@ -97,7 +105,7 @@ def render_html(self, obj: "Test") -> TestHtmlInfo:
def render_json(
self,
- obj: "Test",
+ obj: TTest,
include_render: bool = False,
include: "IncludeOptions" = None,
exclude: "IncludeOptions" = None,
diff --git a/src/evidently/renderers/html_widgets.py b/src/evidently/renderers/html_widgets.py
index 36f632e772..f8fb79e261 100644
--- a/src/evidently/renderers/html_widgets.py
+++ b/src/evidently/renderers/html_widgets.py
@@ -613,7 +613,7 @@ def get_heatmaps_widget(
# show values if thw heatmap is small
if len(columns) < 15:
- heatmap_text = np.round(data, 2).astype(str)
+ heatmap_text: Optional[pd.DataFrame] = np.round(data, 2).astype(str) # type: ignore[assignment]
heatmap_text_template: Optional[str] = "%{text}"
else:
@@ -731,7 +731,7 @@ def get_pr_rec_plot_data(
def get_lift_plot_data(
current_lift_curve: LiftCurve,
- reference_lift_curve: Optional[PRCurve],
+ reference_lift_curve: Optional[LiftCurve],
color_options: ColorOptions,
) -> List[Tuple[str, BaseWidgetInfo]]:
"""
diff --git a/src/evidently/report/report.py b/src/evidently/report/report.py
index 327e8e60e7..e5c55721e3 100644
--- a/src/evidently/report/report.py
+++ b/src/evidently/report/report.py
@@ -6,6 +6,7 @@
from typing import Dict
from typing import List
from typing import Optional
+from typing import Tuple
from typing import Type
from typing import Union
@@ -217,7 +218,7 @@ def as_dataframe(self, group: str = None) -> Union[Dict[str, pd.DataFrame], pd.D
raise ValueError(f"Metric group {group} not found in this report")
return result[group]
- def _build_dashboard_info(self):
+ def _build_dashboard_info(self) -> Tuple[str, DashboardInfo, Dict[str, dict]]:
metrics_results: List[BaseWidgetInfo] = []
additional_graphs = []
@@ -237,7 +238,6 @@ def _build_dashboard_info(self):
for additional_graph in info_item.get_additional_graphs():
if isinstance(additional_graph, AdditionalGraphInfo):
additional_graphs.append(DetailsInfo("", additional_graph.params, additional_graph.id))
-
else:
additional_graphs.append(DetailsInfo("", additional_graph, additional_graph.id))
diff --git a/src/evidently/runner/loader.py b/src/evidently/runner/loader.py
index 50ca1a737e..82ea0d9868 100644
--- a/src/evidently/runner/loader.py
+++ b/src/evidently/runner/loader.py
@@ -64,7 +64,7 @@ def __init__(self):
def load(self, filename: str, data_options: DataOptions, sampling_options: SamplingOptions = None):
sampling_opts = SamplingOptions("none", 0, 0) if sampling_options is None else sampling_options
parse_dates = [data_options.date_column] if data_options.date_column else False
- return pd.read_csv(
+ return pd.read_csv( # type: ignore[call-overload]
filename,
header=0 if data_options.header else None,
sep=data_options.separator,
diff --git a/src/evidently/spark/calculations/histogram.py b/src/evidently/spark/calculations/histogram.py
index cfad2b1acb..cc9a70cf13 100644
--- a/src/evidently/spark/calculations/histogram.py
+++ b/src/evidently/spark/calculations/histogram.py
@@ -39,7 +39,7 @@ def get_histogram(
bin_edges = np.array([min_val + step * i for i in range(nbinsx + 1)])
if density:
- db = np.array(np.diff(bin_edges), float)
+ db: np.ndarray = np.array(np.diff(bin_edges), float)
return (n / db / n.sum()).tolist(), bin_edges
diff --git a/src/evidently/spark/visualizations.py b/src/evidently/spark/visualizations.py
index a8d283ed49..05043a11d9 100644
--- a/src/evidently/spark/visualizations.py
+++ b/src/evidently/spark/visualizations.py
@@ -57,13 +57,13 @@ def prepare_df_for_time_index_plot(
sf.floor(sf.col("_2") / (ptp / (OPTIMAL_POINTS - 1))).alias(PERIOD_COL),
)
)
- plot_df = (
+ plot_df_pandas = (
plot_df.groupby(PERIOD_COL)
.agg(sf.mean(column_name).alias("mean"), sf.stddev(column_name).alias("std"))
.toPandas()
.sort_values(PERIOD_COL) # type: ignore[attr-defined]
)
- return plot_df, None
+ return plot_df_pandas, None
def choose_agg_period(
diff --git a/src/evidently/suite/base_suite.py b/src/evidently/suite/base_suite.py
index 2e109afeaf..fa6225255d 100644
--- a/src/evidently/suite/base_suite.py
+++ b/src/evidently/suite/base_suite.py
@@ -7,7 +7,7 @@
from typing import IO
from typing import Any
from typing import Dict
-from typing import Iterator
+from typing import Generator
from typing import List
from typing import Optional
from typing import Tuple
@@ -90,7 +90,7 @@ def find_metric_renderer(obj, renderers: RenderersDefinitions) -> MetricRenderer
raise KeyError(f"No renderer found for {obj}")
-def _discover_dependencies(test: Union[Metric, Test]) -> Iterator[Tuple[str, Union[Metric, Test]]]:
+def _discover_dependencies(test: Union[Metric, Test]) -> Generator[Tuple[str, Union[Metric, Test]], None, None]:
if hasattr(test, "__evidently_dependencies__"):
yield from test.__evidently_dependencies__() # type: ignore[union-attr]
return
@@ -480,7 +480,7 @@ class DatasetLinks(BaseModel):
current: Optional[DatasetID] = None
additional: Dict[str, DatasetID] = {}
- def __iter__(self) -> Iterator[Tuple[str, DatasetID]]:
+ def __iter__(self) -> Generator[Tuple[str, DatasetID], None, None]:
if self.reference is not None:
yield "reference", self.reference
if self.current is not None:
@@ -492,9 +492,9 @@ class DatasetInputOutputLinks(BaseModel):
input: DatasetLinks = DatasetLinks()
output: DatasetLinks = DatasetLinks()
- def __iter__(self) -> Iterator[Tuple[str, str, DatasetID]]:
- yield from (("input", subtype, dataset_id) for subtype, dataset_id in self.input)
- yield from (("output", subtype, dataset_id) for subtype, dataset_id in self.output)
+ def __iter__(self) -> Generator[Tuple[str, Tuple[str, DatasetID]], None, None]:
+ yield from (("input", (subtype, dataset_id)) for subtype, dataset_id in self.input)
+ yield from (("output", (subtype, dataset_id)) for subtype, dataset_id in self.output)
class SnapshotLinks(BaseModel):
diff --git a/src/evidently/test_preset/classification_multiclass.py b/src/evidently/test_preset/classification_multiclass.py
index 85d9a26410..55fecde560 100644
--- a/src/evidently/test_preset/classification_multiclass.py
+++ b/src/evidently/test_preset/classification_multiclass.py
@@ -70,8 +70,8 @@ def generate_tests(
tests: List[AnyTest] = [
TestAccuracyScore(),
TestF1Score(),
- *[TestPrecisionByClass(label) for label in labels],
- *[TestRecallByClass(label) for label in labels],
+ *[TestPrecisionByClass(label=label) for label in labels],
+ *[TestRecallByClass(label=label) for label in labels],
TestNumberOfRows(),
TestColumnDrift(
column_name=target.column_name,
diff --git a/src/evidently/test_preset/data_drift.py b/src/evidently/test_preset/data_drift.py
index dfec627696..29373e9166 100644
--- a/src/evidently/test_preset/data_drift.py
+++ b/src/evidently/test_preset/data_drift.py
@@ -85,21 +85,22 @@ def generate_tests(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyTest]:
embeddings_data = data_definition.embeddings
+ columns = self.columns
if embeddings_data is not None:
embs = list(set(v for values in embeddings_data.values() for v in values))
- if self.columns is None:
- self.columns = list(
+ if columns is None:
+ columns = list(
np.setdiff1d(
[column.column_name for column in data_definition.get_columns(features_only=True)],
embs,
)
)
else:
- self.columns = list(np.setdiff1d(self.columns, embs))
+ columns = list(np.setdiff1d(columns, embs))
preset_tests: list = [
TestShareOfDriftedColumns(
- columns=self.columns,
+ columns=columns,
lt=0.3 if self.drift_share is None else self.drift_share,
stattest=self.stattest,
cat_stattest=self.cat_stattest,
@@ -164,7 +165,7 @@ def generate_tests(
preset_tests.append(
TestAllFeaturesValueDrift(
- self.columns,
+ columns,
self.stattest,
self.cat_stattest,
self.num_stattest,
diff --git a/src/evidently/test_preset/no_target_performance.py b/src/evidently/test_preset/no_target_performance.py
index 92cc6888a1..96bccd0fc8 100644
--- a/src/evidently/test_preset/no_target_performance.py
+++ b/src/evidently/test_preset/no_target_performance.py
@@ -97,17 +97,18 @@ def generate_tests(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyTest]:
embeddings_data = data_definition.embeddings
+ columns = self.columns
if embeddings_data is not None:
embs = list(set(v for values in embeddings_data.values() for v in values))
- if self.columns is None:
- self.columns = list(
+ if columns is None:
+ columns = list(
np.setdiff1d(
[column.column_name for column in data_definition.get_columns(features_only=True)],
embs,
)
)
else:
- self.columns = list(np.setdiff1d(self.columns, embs))
+ columns = list(np.setdiff1d(columns, embs))
preset_tests: List = []
@@ -150,10 +151,10 @@ def generate_tests(
)
)
preset_tests.append(TestColumnsType())
- preset_tests.append(TestAllColumnsShareOfMissingValues(columns=self.columns))
- preset_tests.append(TestNumColumnsOutOfRangeValues(columns=self.columns))
- preset_tests.append(TestCatColumnsOutOfListValues(columns=self.columns))
- preset_tests.append(TestNumColumnsMeanInNSigmas(columns=self.columns))
+ preset_tests.append(TestAllColumnsShareOfMissingValues(columns=columns))
+ preset_tests.append(TestNumColumnsOutOfRangeValues(columns=columns))
+ preset_tests.append(TestCatColumnsOutOfListValues(columns=columns))
+ preset_tests.append(TestNumColumnsMeanInNSigmas(columns=columns))
if embeddings_data is None:
return preset_tests
diff --git a/src/evidently/tests/data_integrity_tests.py b/src/evidently/tests/data_integrity_tests.py
index 301785d9c5..8c2e503528 100644
--- a/src/evidently/tests/data_integrity_tests.py
+++ b/src/evidently/tests/data_integrity_tests.py
@@ -7,7 +7,7 @@
import numpy as np
import pandas as pd
-from pandas.core.dtypes.common import infer_dtype_from_object
+from pandas.core.dtypes.common import infer_dtype_from_object # type: ignore[attr-defined]
from evidently.base_metric import ColumnName
from evidently.metrics import ColumnRegExpMetric
@@ -1146,7 +1146,7 @@ def render_html(self, obj: TestColumnRegExp) -> TestHtmlInfo:
if metric_result.current.table_of_not_matched:
curr_df = pd.DataFrame(metric_result.current.table_of_not_matched.items())
- curr_df.columns = ["x", "count"]
+ curr_df.columns = pd.Index(["x", "count"])
else:
curr_df = pd.DataFrame(columns=["x", "count"])
@@ -1155,7 +1155,7 @@ def render_html(self, obj: TestColumnRegExp) -> TestHtmlInfo:
if metric_result.reference is not None and metric_result.reference.table_of_not_matched:
ref_df = pd.DataFrame(metric_result.reference.table_of_not_matched.items())
- ref_df.columns = ["x", "count"]
+ ref_df.columns = pd.Index(["x", "count"])
additional_plots = plot_value_counts_tables_ref_curr(
column_name, curr_df, ref_df, f"{column_name}_ColumnValueRegExp"
diff --git a/src/evidently/tests/data_quality_tests.py b/src/evidently/tests/data_quality_tests.py
index a57eb7c127..7b39feb5e6 100644
--- a/src/evidently/tests/data_quality_tests.py
+++ b/src/evidently/tests/data_quality_tests.py
@@ -388,7 +388,7 @@ def __init__(
)
def get_condition_from_reference(self, reference: Optional[DatasetCorrelation]) -> TestValueCondition:
- pass
+ raise NotImplementedError()
def get_condition(self) -> TestValueCondition:
if self.condition.has_condition():
@@ -411,9 +411,10 @@ def calculate_value_for_test(self) -> Optional[Numeric]:
return (diff.abs() > self.corr_diff).sum().sum() / 2
current_correlations = current_correlations[self.column_name.display_name]
- if reference_correlations is not None:
- reference_correlations = reference_correlations[self.column_name.display_name]
- diff = reference_correlations - current_correlations
+ if reference_correlations is None:
+ raise ValueError("Reference is required for test")
+ reference_correlations_data = reference_correlations[self.column_name.display_name]
+ diff = reference_correlations_data - current_correlations
return (diff.abs() > self.corr_diff).sum()
def get_description(self, value: Numeric) -> str:
@@ -629,7 +630,7 @@ def render_html(self, obj: BaseFeatureDataQualityMetricsTest) -> TestHtmlInfo:
info.with_details(f"{obj.name} {column_name}", plotly_figure(title="", figure=fig))
return info
- def _feature_render_html(self, obj):
+ def _feature_render_html(self, obj: BaseFeatureDataQualityMetricsTest):
column_name = obj.column_name
info = super().render_html(obj)
metric_result: ColumnSummaryResult = obj.metric.get_result()
@@ -1708,7 +1709,7 @@ def get_parameters(self) -> CheckValueParameters:
@default_renderer(wrap_type=TestCategoryShare)
class TestCategoryRenderer(TestRenderer):
@staticmethod
- def _get_number_and_percents(s: pd.Series, num: int) -> pd.DataFrame:
+ def _get_number_and_percents(s: pd.Series, num: int) -> pd.Series:
"""Get a string with missing values numbers and percents from info for results table"""
return s.astype(str) + " (" + (s / num * 100).round(2).astype(str) + "%)"
@@ -1727,7 +1728,7 @@ def get_value_counts_table_with_percents(
ref_df = ref_df.copy()
replace.append(("reference value counts", n_ref))
df = curr_df.merge(ref_df, on="x", how="outer")
- df.columns = ["value", "current value counts", "reference value counts"]
+ df.columns = pd.Index(["value", "current value counts", "reference value counts"])
df[["current value counts", "reference value counts"]] = df[
["current value counts", "reference value counts"]
].fillna(0.0)
@@ -1735,7 +1736,7 @@ def get_value_counts_table_with_percents(
else:
df = curr_df
- df.columns = ["value", "current value counts"]
+ df.columns = pd.Index(["value", "current value counts"])
df.sort_values("current value counts", ascending=False, inplace=True)
for col, n in replace:
df[col] = self._get_number_and_percents(df[col].fillna(0), n)
diff --git a/src/evidently/tests/recsys_tests.py b/src/evidently/tests/recsys_tests.py
index bc9b27671f..02c2568e21 100644
--- a/src/evidently/tests/recsys_tests.py
+++ b/src/evidently/tests/recsys_tests.py
@@ -1,7 +1,9 @@
import abc
from typing import ClassVar
+from typing import Generic
from typing import List
from typing import Optional
+from typing import TypeVar
from typing import Union
from evidently.metric_results import HistogramData
@@ -267,13 +269,16 @@ class TestMRRKRenderer(BaseTopkRecsysRenderer):
]
-class BaseNotRankRecsysTest(BaseCheckValueTest, abc.ABC):
+TBaseNotRankRecsysType = TypeVar("TBaseNotRankRecsysType")
+
+
+class BaseNotRankRecsysTest(Generic[TBaseNotRankRecsysType], BaseCheckValueTest, abc.ABC):
group: ClassVar = RECSYS_GROUP.id
header: str
k: int
min_rel_score: Optional[int]
item_features: Optional[List[str]]
- _metric: BaseNotRankRecsysType
+ _metric: TBaseNotRankRecsysType
def __init__(
self,
@@ -323,7 +328,7 @@ def get_description(self, value: Numeric) -> str:
return f"{self.header}@{self.k} is {value:.3}. The test threshold is {self.get_condition()}"
@abc.abstractmethod
- def get_metric(self, k, min_rel_score, item_features) -> BaseTopKRecsysType:
+ def get_metric(self, k, min_rel_score, item_features) -> TBaseNotRankRecsysType:
raise NotImplementedError()
@property
@@ -353,14 +358,14 @@ def render_html(self, obj: BaseNotRankRecsysTest) -> TestHtmlInfo:
return info
-class TestNovelty(BaseNotRankRecsysTest):
+class TestNovelty(BaseNotRankRecsysTest[NoveltyMetric]):
class Config:
type_alias = "evidently:test:TestNovelty"
name: ClassVar = "Novelty (top-k)"
header: str = "Novelty"
- def get_metric(self, k, min_rel_score, item_features) -> BaseNotRankRecsysType:
+ def get_metric(self, k, min_rel_score, item_features) -> NoveltyMetric:
return NoveltyMetric(k=k)
@@ -369,14 +374,14 @@ class TestNoveltyRenderer(BaseNotRankRecsysTestRenderer):
xaxis_name = "novelty by user"
-class TestDiversity(BaseNotRankRecsysTest):
+class TestDiversity(BaseNotRankRecsysTest[DiversityMetric]):
class Config:
type_alias = "evidently:test:TestDiversity"
name: ClassVar = "Diversity (top-k)"
header: str = "Diversity"
- def get_metric(self, k, min_rel_score, item_features) -> BaseNotRankRecsysType:
+ def get_metric(self, k, min_rel_score, item_features) -> DiversityMetric:
return DiversityMetric(k=k, item_features=item_features)
@@ -385,14 +390,14 @@ class TestDiversityRenderer(BaseNotRankRecsysTestRenderer):
xaxis_name = "intra list diversity by user"
-class TestSerendipity(BaseNotRankRecsysTest):
+class TestSerendipity(BaseNotRankRecsysTest[SerendipityMetric]):
class Config:
type_alias = "evidently:test:TestSerendipity"
name: ClassVar = "Serendipity (top-k)"
header: str = "Serendipity"
- def get_metric(self, k, min_rel_score, item_features) -> BaseNotRankRecsysType:
+ def get_metric(self, k, min_rel_score, item_features) -> SerendipityMetric:
return SerendipityMetric(k=k, min_rel_score=min_rel_score, item_features=item_features)
@@ -401,14 +406,14 @@ class TestSerendipityRenderer(BaseNotRankRecsysTestRenderer):
xaxis_name = "serendipity by user"
-class TestPersonalization(BaseNotRankRecsysTest):
+class TestPersonalization(BaseNotRankRecsysTest[PersonalizationMetric]):
class Config:
type_alias = "evidently:test:TestPersonalization"
name: ClassVar = "Personalization (top-k)"
header: str = "Personalization"
- def get_metric(self, k, min_rel_score, item_features) -> BaseNotRankRecsysType:
+ def get_metric(self, k, min_rel_score, item_features) -> PersonalizationMetric:
return PersonalizationMetric(k=k)
diff --git a/src/evidently/tests/utils.py b/src/evidently/tests/utils.py
index 4dad885b62..0cadbb5460 100644
--- a/src/evidently/tests/utils.py
+++ b/src/evidently/tests/utils.py
@@ -1,4 +1,5 @@
from typing import List
+from typing import Literal
from typing import Optional
from typing import Tuple
@@ -193,7 +194,7 @@ def dataframes_to_table(
columns: List[str],
table_id: str,
sort_by: str = "curr",
- na_position: str = "first",
+ na_position: Literal["first", "last"] = "first",
asc: bool = False,
):
display_columns = ["display"]
diff --git a/src/evidently/ui/app.py b/src/evidently/ui/app.py
index 8dd42872f7..4a6ce949ad 100644
--- a/src/evidently/ui/app.py
+++ b/src/evidently/ui/app.py
@@ -4,6 +4,7 @@
from evidently._pydantic_compat import SecretStr
from evidently.ui.components.base import AppBuilder
+from evidently.ui.components.storage import LocalStorageComponent
from evidently.ui.config import AppConfig
from evidently.ui.config import load_config
from evidently.ui.config import settings
@@ -37,6 +38,8 @@ def get_config(
config = load_config(LocalConfig, settings)
config.service.host = host
config.service.port = port
+ if not isinstance(config.storage, LocalStorageComponent):
+ raise ValueError("Storage component is not a LocalStorageComponent")
config.storage.path = workspace
secret = secret or os.environ.get(EVIDENTLY_SECRET_ENV)
diff --git a/src/evidently/ui/base.py b/src/evidently/ui/base.py
index abf8c54183..f0ed0796d5 100644
--- a/src/evidently/ui/base.py
+++ b/src/evidently/ui/base.py
@@ -1,16 +1,11 @@
-import asyncio
import contextlib
import datetime
import json
-import threading
from abc import ABC
from abc import abstractmethod
from enum import Enum
-from functools import wraps
from typing import IO
from typing import Any
-from typing import Awaitable
-from typing import Callable
from typing import ClassVar
from typing import Dict
from typing import Iterator
@@ -59,30 +54,7 @@
from evidently.utils import NumpyEncoder
from evidently.utils.dashboard import TemplateParams
from evidently.utils.dashboard import inline_iframe_html_template
-
-_loop = asyncio.new_event_loop()
-
-_thr = threading.Thread(target=_loop.run_forever, name="Async Runner", daemon=True)
-
-
-TA = TypeVar("TA")
-
-
-def async_to_sync(awaitable: Awaitable[TA]) -> TA:
- try:
- asyncio.get_running_loop()
- # we are in sync context but inside a running loop
- if not _thr.is_alive():
- _thr.start()
- future = asyncio.run_coroutine_threadsafe(awaitable, _loop)
- return future.result()
- except RuntimeError:
- new_loop = asyncio.new_event_loop()
- asyncio.set_event_loop(new_loop)
- try:
- return new_loop.run_until_complete(awaitable)
- finally:
- new_loop.close()
+from evidently.utils.sync import sync_api
class BlobMetadata(BaseModel):
@@ -188,14 +160,6 @@ def _default_dashboard():
return DashboardConfig(name="", panels=[])
-def sync_api(f: Callable[..., Awaitable[TA]]) -> Callable[..., TA]:
- @wraps(f)
- def sync_call(*args, **kwargs):
- return async_to_sync(f(*args, **kwargs))
-
- return sync_call
-
-
class Project(Entity):
entity_type: ClassVar[EntityType] = EntityType.Project
diff --git a/src/evidently/ui/config.py b/src/evidently/ui/config.py
index 53a35f00a0..f63f6695a8 100644
--- a/src/evidently/ui/config.py
+++ b/src/evidently/ui/config.py
@@ -37,7 +37,7 @@ def __init__(self, config: "Config", components_mapping: Dict[Type[Component], C
def get_component(self, type_: Type[T]) -> T:
for cls in self.components_mapping:
if issubclass(cls, type_):
- return self.components_mapping[cls]
+ return self.components_mapping[cls] # type: ignore[return-value]
raise ValueError(f"Component of type {type_.__name__} not found")
@property
diff --git a/src/evidently/ui/dashboards/reports.py b/src/evidently/ui/dashboards/reports.py
index 8548cd0573..832b60f073 100644
--- a/src/evidently/ui/dashboards/reports.py
+++ b/src/evidently/ui/dashboards/reports.py
@@ -156,7 +156,7 @@ async def build(
timestamp_start: Optional[datetime.datetime],
timestamp_end: Optional[datetime.datetime],
) -> BaseWidgetInfo:
- bins_for_hists: Dict[Metric, List[Tuple[datetime.datetime, Union[HistogramData, Distribution]]]] = (
+ bins_for_hists: Dict[Metric, List[Tuple[datetime.datetime, Union[HistogramData, Distribution]]]] = ( # type: ignore[assignment]
await data_storage.load_points_as_type(
Union[HistogramData, Distribution], # type: ignore[arg-type]
project_id,
diff --git a/src/evidently/ui/dashboards/test_suites.py b/src/evidently/ui/dashboards/test_suites.py
index 673f557c53..d7eb70efa3 100644
--- a/src/evidently/ui/dashboards/test_suites.py
+++ b/src/evidently/ui/dashboards/test_suites.py
@@ -186,7 +186,7 @@ def get_color(test, date) -> Optional[str]:
def to_period(time_agg: Optional[str], timestamp: datetime.datetime) -> datetime.datetime:
if time_agg is None:
return timestamp
- return pd.Series([timestamp], name="dt").dt.to_period(time_agg)[0]
+ return pd.Series([timestamp], name="dt").dt.to_period(time_agg)[0].to_timestamp()
@autoregister
diff --git a/src/evidently/ui/workspace/view.py b/src/evidently/ui/workspace/view.py
index faa5a02d76..fcacb4c224 100644
--- a/src/evidently/ui/workspace/view.py
+++ b/src/evidently/ui/workspace/view.py
@@ -9,7 +9,6 @@
from evidently.suite.base_suite import Snapshot
from evidently.ui.base import Project
from evidently.ui.base import ProjectManager
-from evidently.ui.base import async_to_sync
from evidently.ui.type_aliases import STR_UUID
from evidently.ui.type_aliases import ZERO_UUID
from evidently.ui.type_aliases import DatasetID
@@ -17,6 +16,7 @@
from evidently.ui.type_aliases import TeamID
from evidently.ui.type_aliases import UserID
from evidently.ui.workspace.base import WorkspaceBase
+from evidently.utils.sync import async_to_sync
class WorkspaceView(WorkspaceBase):
diff --git a/src/evidently/utils/data_operations.py b/src/evidently/utils/data_operations.py
index d46896ae86..c433ae7ce5 100644
--- a/src/evidently/utils/data_operations.py
+++ b/src/evidently/utils/data_operations.py
@@ -73,7 +73,7 @@ def process_columns(dataset: pd.DataFrame, column_mapping: ColumnMapping) -> Dat
else:
num_feature_names = [col for col in num_feature_names if col in dataset.columns]
empty_cols = dataset[num_feature_names].isnull().mean()
- empty_cols = empty_cols[empty_cols == 1.0].index
+ empty_cols = empty_cols[empty_cols == 1.0].index.to_series()
num_feature_names = sorted(
list(set(dataset[num_feature_names].select_dtypes([np.number]).columns).union(set(empty_cols)))
)
@@ -82,7 +82,7 @@ def process_columns(dataset: pd.DataFrame, column_mapping: ColumnMapping) -> Dat
datetime_feature_names = sorted(list(set(dataset.select_dtypes(["datetime"]).columns) - utility_columns_set))
else:
empty_cols = dataset[datetime_feature_names].isnull().mean()
- empty_cols = empty_cols[empty_cols == 1.0].index
+ empty_cols = empty_cols[empty_cols == 1.0].index.to_series()
datetime_feature_names = sorted(
list(set(dataset[datetime_feature_names].select_dtypes(["datetime"]).columns).union(set(empty_cols)))
)
diff --git a/src/evidently/utils/data_preprocessing.py b/src/evidently/utils/data_preprocessing.py
index 4c84f9d652..d7ec1cf31f 100644
--- a/src/evidently/utils/data_preprocessing.py
+++ b/src/evidently/utils/data_preprocessing.py
@@ -586,7 +586,8 @@ def _get_column_type(
f" Returning type from reference"
)
cur_type = ref_type
- if not np.can_cast(cur_type, ref_type) and not np.can_cast(ref_type, cur_type):
+ # TODO: add proper type check
+ if not np.can_cast(cur_type, ref_type) and not np.can_cast(ref_type, cur_type): # type: ignore[arg-type]
logging.warning(
f"Column {column_name} have different types in reference {ref_type} and current {cur_type}."
f" Returning type from reference"
diff --git a/src/evidently/utils/llm/base.py b/src/evidently/utils/llm/base.py
index 2abf77b571..b7852062a0 100644
--- a/src/evidently/utils/llm/base.py
+++ b/src/evidently/utils/llm/base.py
@@ -3,7 +3,7 @@
from typing import Dict
-@dataclasses.dataclass
+@dataclasses.dataclass(unsafe_hash=True, frozen=True)
class LLMMessage:
role: str
content: str
diff --git a/src/evidently/utils/llm/wrapper.py b/src/evidently/utils/llm/wrapper.py
index ef26cdb68d..c858ec9a1e 100644
--- a/src/evidently/utils/llm/wrapper.py
+++ b/src/evidently/utils/llm/wrapper.py
@@ -20,9 +20,9 @@
from evidently._pydantic_compat import SecretStr
from evidently.options.base import Options
from evidently.options.option import Option
-from evidently.ui.base import sync_api
from evidently.utils.llm.base import LLMMessage
from evidently.utils.llm.errors import LLMRequestError
+from evidently.utils.sync import sync_api
TResult = TypeVar("TResult")
@@ -194,8 +194,8 @@ async def complete(self, messages: List[LLMMessage]) -> str:
messages = [{"role": msg.role, "content": msg.content} for msg in messages]
try:
response = await self.client.chat.completions.create(model=self.model, messages=messages) # type: ignore[arg-type]
- except openai.OpenAIError as e:
- raise LLMRequestError("Failed to call OpenAI complete API") from e
+ except openai.APIError as e:
+ raise LLMRequestError(f"Failed to call OpenAI complete API: {e.message}") from e
content = response.choices[0].message.content
assert content is not None # todo: better error
return content
diff --git a/src/evidently/utils/sync.py b/src/evidently/utils/sync.py
new file mode 100644
index 0000000000..81efbb5f8e
--- /dev/null
+++ b/src/evidently/utils/sync.py
@@ -0,0 +1,37 @@
+import asyncio
+import threading
+from functools import wraps
+from typing import Awaitable
+from typing import Callable
+from typing import TypeVar
+
+_loop = asyncio.new_event_loop()
+
+_thr = threading.Thread(target=_loop.run_forever, name="Async Runner", daemon=True)
+
+TA = TypeVar("TA")
+
+
+def async_to_sync(awaitable: Awaitable[TA]) -> TA:
+ try:
+ asyncio.get_running_loop()
+ # we are in sync context but inside a running loop
+ if not _thr.is_alive():
+ _thr.start()
+ future = asyncio.run_coroutine_threadsafe(awaitable, _loop)
+ return future.result()
+ except RuntimeError:
+ new_loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(new_loop)
+ try:
+ return new_loop.run_until_complete(awaitable)
+ finally:
+ new_loop.close()
+
+
+def sync_api(f: Callable[..., Awaitable[TA]]) -> Callable[..., TA]:
+ @wraps(f)
+ def sync_call(*args, **kwargs):
+ return async_to_sync(f(*args, **kwargs))
+
+ return sync_call
diff --git a/src/evidently/utils/visualizations.py b/src/evidently/utils/visualizations.py
index f32180aa6c..64d7200e3f 100644
--- a/src/evidently/utils/visualizations.py
+++ b/src/evidently/utils/visualizations.py
@@ -579,7 +579,11 @@ def make_hist_for_num_plot(curr: pd.Series, ref: Optional[pd.Series] = None, cal
def plot_cat_cat_rel(
- curr: pd.DataFrame, ref: pd.DataFrame, target_name: str, feature_name: str, color_options: ColorOptions
+ curr: pd.DataFrame,
+ ref: Optional[pd.DataFrame],
+ target_name: str,
+ feature_name: str,
+ color_options: ColorOptions,
):
"""
Accepts current and reference data as pandas dataframes with two columns: feature_name and "count_objects".
@@ -684,23 +688,31 @@ def plot_num_num_rel(
def make_hist_for_cat_plot(curr: pd.Series, ref: pd.Series = None, normalize: bool = False, dropna=False) -> Histogram:
- hist_df = curr.astype(str).value_counts(normalize=normalize, dropna=dropna).reset_index()
- hist_df.columns = ["x", "count"]
+ hist_df = (
+ curr.astype(str)
+ .value_counts(normalize=normalize, dropna=dropna) # type: ignore[call-overload]
+ .reset_index()
+ )
+ hist_df.columns = pd.Index(["x", "count"])
current = HistogramData.from_df(hist_df)
reference = None
if ref is not None:
- hist_df = ref.astype(str).value_counts(normalize=normalize, dropna=dropna).reset_index()
- hist_df.columns = ["x", "count"]
+ hist_df = (
+ ref.astype(str)
+ .value_counts(normalize=normalize, dropna=dropna) # type: ignore[call-overload]
+ .reset_index()
+ )
+ hist_df.columns = pd.Index(["x", "count"])
reference = HistogramData.from_df(hist_df)
return Histogram(current=current, reference=reference)
def get_distribution_for_category_column(column: pd.Series, normalize: bool = False) -> Distribution:
- value_counts = column.value_counts(normalize=normalize, dropna=False)
+ value_counts = column.value_counts(normalize=normalize, dropna=False) # type: ignore[call-overload]
# filter out na values if it amount == 0
- new_values = [(k, v) for k, v in value_counts.items() if (not pd.isna(k) or v > 0)]
+ new_values = [(k, v) for k, v in value_counts.items() if (not pd.isna(k) or v > 0)] # type: ignore[call-overload]
return Distribution(
x=[x[0] for x in new_values],
@@ -1217,13 +1229,14 @@ def prepare_df_for_time_index_plot(
if datetime_name is not None:
if prefix is None and freq is None:
prefix, freq = choose_agg_period(df[datetime_name], None)
- plot_df = df.copy()
- plot_df["per"] = plot_df[datetime_name].dt.to_period(freq=freq)
- plot_df = plot_df.groupby("per")[column_name].agg(["mean", "std"]).reset_index()
- plot_df["per"] = plot_df["per"].dt.to_timestamp()
- return plot_df, prefix
- plot_df = df[column_name].reset_index().sort_values(index_name)
- plot_df["per"] = pd.cut(plot_df[index_name], OPTIMAL_POINTS if bins is None else bins, labels=False)
+ dt_plot_df: pd.DataFrame = df.copy()
+ dt_plot_df["per"] = dt_plot_df[datetime_name].dt.to_period(freq=freq)
+ dt_plot_df = dt_plot_df.groupby("per")[column_name].agg(["mean", "std"]).reset_index()
+ dt_plot_df["per"] = dt_plot_df["per"].dt.to_timestamp()
+ return dt_plot_df, prefix
+ plot_df: pd.DataFrame = df[column_name].reset_index().sort_values(index_name)
+ new_bins = OPTIMAL_POINTS if bins is None else bins
+ plot_df["per"] = pd.cut(plot_df[index_name], bins=new_bins, labels=False) # type: ignore[call-overload]
plot_df = plot_df.groupby("per")[column_name].agg(["mean", "std"]).reset_index()
return plot_df, None
@@ -1389,10 +1402,10 @@ def plot_metric_k(curr_data: pd.Series, ref_data: Optional[pd.Series], yaxis_nam
def plot_bias(
- curr: Distribution,
- curr_train: Distribution,
- ref: Optional[Distribution],
- ref_train: Optional[Distribution],
+ curr: HistogramData,
+ curr_train: HistogramData,
+ ref: Optional[HistogramData],
+ ref_train: Optional[HistogramData],
xaxis_name: str,
):
color_options = ColorOptions()
@@ -1444,10 +1457,10 @@ def plot_bias(
def plot_4_distr(
- curr_1: Distribution,
- curr_2: Optional[Distribution],
- ref_1: Optional[Distribution],
- ref_2: Optional[Distribution],
+ curr_1: HistogramData,
+ curr_2: Optional[HistogramData],
+ ref_1: Optional[HistogramData],
+ ref_2: Optional[HistogramData],
name_1: str,
name_2: str,
xaxis_name: str,
diff --git a/tests/features/test_contains_link_feature.py b/tests/features/test_contains_link_feature.py
new file mode 100644
index 0000000000..ab95f05d3a
--- /dev/null
+++ b/tests/features/test_contains_link_feature.py
@@ -0,0 +1,35 @@
+import pandas as pd
+
+from evidently.features.contains_link_feature import ContainsLink
+from evidently.pipeline.column_mapping import ColumnMapping
+from evidently.utils.data_preprocessing import create_data_definition
+
+
+def test_contains_link_feature():
+ # Initialize the ContainsLink feature generator for column_1
+ feature_generator = ContainsLink("column_1")
+
+ # Sample data with varying texts that contain or don't contain links
+ data = pd.DataFrame(
+ dict(
+ column_1=[
+ "Check out https://example.com for more info", # Contains a valid link
+ "Visit our website at http://www.test.com.", # Contains a valid link
+ "No link here, just plain text", # No link
+ "Another string without a link", # No link
+ "Here is a malformed link: www.test.com", # Invalid link (missing scheme)
+ ]
+ )
+ )
+
+ # Generate the feature
+ result = feature_generator.generate_feature(
+ data=data,
+ data_definition=create_data_definition(None, data, ColumnMapping()),
+ )
+
+ # Expected result: True for valid links, False otherwise
+ expected_result = pd.DataFrame(dict(column_1=[True, True, False, False, False]))
+
+ # Assert that the generated result matches the expected result
+ assert result.equals(expected_result)
diff --git a/tests/features/test_exact_feature.py b/tests/features/test_exact_feature.py
new file mode 100644
index 0000000000..cac6f5f0d0
--- /dev/null
+++ b/tests/features/test_exact_feature.py
@@ -0,0 +1,23 @@
+import pandas as pd
+import pytest
+
+from evidently.features.exact_match_feature import ExactMatchFeature
+from evidently.pipeline.column_mapping import ColumnMapping
+from evidently.utils.data_preprocessing import create_data_definition
+
+
+@pytest.mark.parametrize(
+ ("value1", "value2", "expected"),
+ [
+ ("this is same", "this is same", True),
+ ("this is same", "this is different", False),
+ ],
+)
+def test_exact_match_feature(value1: str, value2: str, expected: bool):
+ feature_generator = ExactMatchFeature(columns=["column_1", "column_2"])
+ data = pd.DataFrame(dict(column_1=[value1], column_2=[value2]))
+ result = feature_generator.generate_feature(
+ data=data, data_definition=create_data_definition(None, data, ColumnMapping())
+ )
+ expected_df = pd.DataFrame([[expected]], columns=["column_1|column_2"])
+ pd.testing.assert_frame_equal(result, expected_df)
diff --git a/tests/features/test_is_valid_json_feature.py b/tests/features/test_is_valid_json_feature.py
new file mode 100644
index 0000000000..e3078cbcee
--- /dev/null
+++ b/tests/features/test_is_valid_json_feature.py
@@ -0,0 +1,23 @@
+import pandas as pd
+import pytest
+
+from evidently.features.is_valid_json_feature import IsValidJSON
+from evidently.pipeline.column_mapping import ColumnMapping
+from evidently.utils.data_preprocessing import create_data_definition
+
+
+@pytest.mark.parametrize(
+ ("item", "expected"),
+ [
+ ('{"test": "abc"}', True),
+ ("not json", False),
+ ],
+)
+def test_is_valid_json_feature(item: str, expected: bool):
+ feature_generator = IsValidJSON("column_1")
+ data = pd.DataFrame(dict(column_1=[item]))
+ result = feature_generator.generate_feature(
+ data=data,
+ data_definition=create_data_definition(None, data, ColumnMapping()),
+ )
+ assert result.equals(pd.DataFrame(dict(column_1=[expected])))
diff --git a/tests/features/test_is_valid_python_feature.py b/tests/features/test_is_valid_python_feature.py
new file mode 100644
index 0000000000..d95005e6b3
--- /dev/null
+++ b/tests/features/test_is_valid_python_feature.py
@@ -0,0 +1,54 @@
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from evidently.features.is_valid_python_feature import IsValidPython
+
+
+@pytest.mark.parametrize(
+ ("column_value", "expected"),
+ [
+ ("print('Hello')", True),
+ ("x = 5 + 3", True),
+ ("def foo():\n return 'bar'", True),
+ ("for i in range(10): print(i)", True),
+ ("print('Hello'", False),
+ ("for i in range(5) print(i)", False),
+ ("def foo(\n return 'bar'", False),
+ ("if True print('yes')", False),
+ (None, False),
+ ("12", True),
+ ("Sorry I can't answer this", False),
+ ("{'name': 'test', 'age': 13}", True),
+ ],
+)
+def test_is_valid_python_apply(column_value: Any, expected: bool):
+ is_python = IsValidPython("TestColumnName")
+ actual = is_python.apply(column_value)
+ assert actual == expected
+
+
+test_data = pd.DataFrame(
+ {
+ "TestColumnName": [
+ "print('Hello')",
+ "def foo():\n return 'bar'",
+ "def foo(\n return 'bar'",
+ None,
+ "{'name': 'test', 'age': 13}",
+ np.nan,
+ ]
+ }
+)
+
+
+@pytest.mark.parametrize(
+ ("expected"),
+ [[True, True, False, False, True, False]],
+)
+def test_is_valid_python(expected: bool):
+ is_python = IsValidPython("TestColumnName")
+ actual = is_python.generate_feature(test_data, None)
+ assert actual[is_python._feature_column_name()].tolist() == expected
diff --git a/tests/features/test_json_match.py b/tests/features/test_json_match.py
new file mode 100644
index 0000000000..397713d87d
--- /dev/null
+++ b/tests/features/test_json_match.py
@@ -0,0 +1,57 @@
+import pandas as pd
+
+from evidently.features.json_match_feature import JSONMatch
+from evidently.pipeline.column_mapping import ColumnMapping
+from evidently.utils.data_preprocessing import create_data_definition
+
+
+def test_is_valid_sql_feature():
+ feature_generator = JSONMatch(
+ first_column="col_1", second_column="col_2", display_name="Json Match", feature_type="num", name="is_json_match"
+ )
+
+ # Define JSON strings for each scenario
+ scenarios = [
+ # Scenario 1 - Matching JSONs
+ ('{"name": "Alice", "age": 25, "city": "London"}', '{"city": "London", "age": 25, "name": "Alice"}'),
+ # Scenario 2 - Different whitespace (still matching)
+ ('{ "name" : "Bob" , "age" : 22 , "city" : "Paris" }', '{"city": "Paris", "name": "Bob", "age": 22}'),
+ # Scenario 3 - Invalid JSON in one column
+ (
+ '{"name": "Eve", "age": 28, "city": "Berlin"}',
+ '{"city": "Berlin", "age": 28, "name": Eve}',
+ ), # Missing quotes around "Eve"
+ # Scenario 4 - Keys mismatch
+ (
+ '{"name": "Charlie", "age": 30, "country": "USA"}',
+ '{"name": "Charlie", "age": 30, "city": "USA"}',
+ ), # 'country' vs 'city'
+ # Scenario 5 - Values mismatch
+ (
+ '{"name": "David", "age": 35, "city": "Tokyo"}',
+ '{"city": "Tokyo", "age": 35, "name": "Daniel"}',
+ ), # 'David' vs 'Daniel'
+ ]
+
+ # Create DataFrame
+ data = pd.DataFrame(scenarios, columns=["col_1", "col_2"])
+
+ result = feature_generator.generate_feature(
+ data=data,
+ data_definition=create_data_definition(None, data, ColumnMapping()),
+ )
+
+ expected_result = pd.DataFrame(dict(is_json_match=[True, True, False, False, False]))
+
+ print(result)
+
+ print(expected_result)
+
+ try:
+ assert result.equals(expected_result)
+ return True
+ except AssertionError:
+ return False
+
+
+print(test_is_valid_sql_feature())
diff --git a/tests/features/test_llm_judge.py b/tests/features/test_llm_judge.py
index 030891285e..96104912ff 100644
--- a/tests/features/test_llm_judge.py
+++ b/tests/features/test_llm_judge.py
@@ -9,14 +9,17 @@
import pandas as pd
import pytest
+from evidently.descriptors import NegativityLLMEval
from evidently.features.llm_judge import BinaryClassificationPromptTemplate
from evidently.features.llm_judge import LLMJudge
from evidently.features.llm_judge import LLMMessage
-from evidently.features.llm_judge import LLMResponseParseError
from evidently.features.llm_judge import LLMWrapper
-from evidently.features.llm_judge import llm_provider
+from evidently.metric_preset import TextEvals
from evidently.options.base import Options
+from evidently.report import Report
from evidently.utils.data_preprocessing import DataDefinition
+from evidently.utils.llm.errors import LLMResponseParseError
+from evidently.utils.llm.wrapper import llm_provider
def _LLMPromptTemplate(
@@ -74,14 +77,15 @@ def _LLMPromptTemplate(
],
)
def test_parse_response(
- template: _LLMPromptTemplate, results: Dict[str, Union[LLMResponseParseError, Dict[str, Union[str, float]]]]
+ template: BinaryClassificationPromptTemplate,
+ results: Dict[str, Union[LLMResponseParseError, Dict[str, Union[str, float]]]],
):
for response, expected_result in results.items():
if isinstance(expected_result, LLMResponseParseError):
with pytest.raises(expected_result.__class__):
- template.parse_response(response)
+ template.parse(response)
else:
- assert template.parse_response(response) == expected_result
+ assert template.parse(response) == expected_result
@llm_provider("mock", None)
@@ -89,12 +93,13 @@ class MockLLMWrapper(LLMWrapper):
def __init__(self, model: str, options: Options):
self.model = model
- def complete(self, messages: List[LLMMessage]) -> str:
- text = messages[-1][1]
+ async def complete(self, messages: List[LLMMessage]) -> str:
+ text = messages[-1].content
cat = re.findall("___text_starts_here___\n(.*)\n___text_ends_here___", text)[0][0]
return json.dumps({"category": cat})
+@pytest.mark.asyncio
def test_llm_judge():
llm_judge = LLMJudge(
input_column="text",
@@ -110,6 +115,7 @@ def test_llm_judge():
pd.testing.assert_frame_equal(fts, pd.DataFrame({"category": ["A", "B"]}))
+@pytest.mark.asyncio
def test_multicol_llm_judge():
llm_judge = LLMJudge(
input_columns={"text": "input", "text2": "input2"},
@@ -123,3 +129,41 @@ def test_multicol_llm_judge():
dd = DataDefinition(columns={}, reference_present=False)
fts = llm_judge.generate_features(data, dd, Options())
pd.testing.assert_frame_equal(fts, pd.DataFrame({"category": ["A", "B"]}))
+
+
+def test_run_snapshot_with_llm_judge():
+ data = pd.DataFrame({"text": ["A", "B"], "text2": ["C", "D"]})
+ neg_eval = NegativityLLMEval(
+ input_columns={"text": "input", "text2": "input2"},
+ provider="mock",
+ model="",
+ template=BinaryClassificationPromptTemplate(target_category="A", non_target_category="B"),
+ )
+ report = Report(metrics=[TextEvals("text", descriptors=[neg_eval])])
+
+ report.run(current_data=data, reference_data=None)
+ report._inner_suite.raise_for_error()
+ assert report.as_dict() == {
+ "metrics": [
+ {
+ "metric": "ColumnSummaryMetric",
+ "result": {
+ "column_name": "Negativity category",
+ "column_type": "cat",
+ "current_characteristics": {
+ "count": 2,
+ "missing": 0,
+ "missing_percentage": 0.0,
+ "most_common": "A",
+ "most_common_percentage": 50.0,
+ "new_in_current_values_count": None,
+ "number_of_rows": 2,
+ "unique": 2,
+ "unique_percentage": 100.0,
+ "unused_in_current_values_count": None,
+ },
+ "reference_characteristics": None,
+ },
+ }
+ ]
+ }
diff --git a/tests/features/test_words_feature.py b/tests/features/test_words_feature.py
index 96b5617d0c..d58344e723 100644
--- a/tests/features/test_words_feature.py
+++ b/tests/features/test_words_feature.py
@@ -5,6 +5,8 @@
from evidently.features.words_feature import ExcludesWords
from evidently.features.words_feature import IncludesWords
+from evidently.features.words_feature import WordMatch
+from evidently.features.words_feature import WordNoMatch
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.utils.data_preprocessing import create_data_definition
@@ -51,3 +53,83 @@ def test_excludes_words(words: List[str], mode: str, lemmatize: bool, expected:
data_definition=create_data_definition(None, data, ColumnMapping()),
)
assert result.equals(pd.DataFrame(dict([(feature_generator._feature_column_name(), expected)])))
+
+
+@pytest.mark.parametrize(
+ ["mode", "lemmatize", "expected"],
+ [
+ ("any", False, [True, True, False, False, False, True]),
+ ("all", False, [False, True, False, False, False, True]),
+ ("any", True, [False, False, True, True, False, True]),
+ ("all", True, [False, False, True, False, False, True]),
+ ],
+)
+def test_word_match(mode: str, lemmatize: bool, expected: List[bool]):
+ data = {
+ "generated": [
+ "I love eating apples and grapes.",
+ "I eat apples, grapes, and oranges",
+ "Grapes, oranges, apples.",
+ "Oranges are more sour than grapes.",
+ "This test doesn't have the words.",
+ "You are allowed to cancel at any time, and we guarantee that you will receive a refund.",
+ ],
+ "expected": [
+ ["apples", "grapes", "oranges"],
+ ["grapes", "apples", "oranges"],
+ ["apple", "orange", "grape"],
+ ["orange", "sweet", "grape"],
+ ["none", "of", "these"],
+ ["guarantee", "allowed", "refund"],
+ ],
+ }
+ df = pd.DataFrame(data)
+ df["expected"] = df["expected"].apply(tuple)
+ feature_generator = WordMatch(columns=["generated", "expected"], mode=mode, lemmatize=lemmatize)
+ result = feature_generator.generate_feature(
+ data=df,
+ data_definition=create_data_definition(None, df, ColumnMapping()),
+ )
+ assert result.equals(pd.DataFrame(dict([(feature_generator._feature_name(), expected)])))
+ column_obj = feature_generator._as_column()
+ assert column_obj.display_name == f"Text contains {mode} defined words"
+
+
+@pytest.mark.parametrize(
+ ["mode", "lemmatize", "expected"],
+ [
+ ("any", False, [True, False, True, True, True, False]),
+ ("all", False, [False, False, True, True, True, False]),
+ ("any", True, [True, True, False, True, True, False]),
+ ("all", True, [True, True, False, False, True, False]),
+ ],
+)
+def test_word_no_match(mode: str, lemmatize: bool, expected: List[bool]):
+ data = {
+ "generated": [
+ "I love eating apples and grapes.",
+ "I eat apples, grapes, and oranges",
+ "Grapes, oranges, apples.",
+ "Oranges are more sour than grapes.",
+ "This test doesn't have the words.",
+ "You are allowed to cancel at any time, and we guarantee that you will receive a refund.",
+ ],
+ "forbidden": [
+ ["apples", "grapes", "oranges"],
+ ["grapes", "apples", "oranges"],
+ ["apple", "orange", "grape"],
+ ["orange", "sweet", "grape"],
+ ["none", "of", "these"],
+ ["guarantee", "allowed", "refund"],
+ ],
+ }
+ df = pd.DataFrame(data)
+ df["forbidden"] = df["forbidden"].apply(tuple)
+ feature_generator = WordNoMatch(columns=["generated", "forbidden"], mode=mode, lemmatize=lemmatize)
+ result = feature_generator.generate_feature(
+ data=df,
+ data_definition=create_data_definition(None, df, ColumnMapping()),
+ )
+ assert result.equals(pd.DataFrame(dict([(feature_generator._feature_name(), expected)])))
+ column_obj = feature_generator._as_column()
+ assert column_obj.display_name == f"Text does not contain {mode} defined words"
diff --git a/tests/test_preset/test_data_drift_preset.py b/tests/test_preset/test_data_drift_preset.py
new file mode 100644
index 0000000000..dfbbd05241
--- /dev/null
+++ b/tests/test_preset/test_data_drift_preset.py
@@ -0,0 +1,25 @@
+from evidently import ColumnType
+from evidently.test_preset import DataDriftTestPreset
+from evidently.tests import TestAllFeaturesValueDrift
+from evidently.tests import TestEmbeddingsDrift
+from evidently.tests import TestShareOfDriftedColumns
+from evidently.utils.data_preprocessing import ColumnDefinition
+from evidently.utils.data_preprocessing import DataDefinition
+
+
+def test_embeddings_data_drift_preset():
+ data_definition = DataDefinition(
+ columns={
+ "target": ColumnDefinition("target", ColumnType.Numerical),
+ },
+ embeddings={
+ "small_set": ["col_1", "col_2"],
+ "big_set": ["col_3", "col_4"],
+ },
+ reference_present=True,
+ )
+ preset = DataDriftTestPreset(embeddings=["small_set", "big_set"])
+ tests = preset.generate_tests(data_definition=data_definition, additional_data=None)
+ assert len(tests) == 4
+ expected_tests = [TestShareOfDriftedColumns, TestAllFeaturesValueDrift, TestEmbeddingsDrift, TestEmbeddingsDrift]
+ assert expected_tests == [type(test) for test in tests]
diff --git a/tests/test_preset/test_no_target_performance_preset.py b/tests/test_preset/test_no_target_performance_preset.py
new file mode 100644
index 0000000000..1ef1ecd025
--- /dev/null
+++ b/tests/test_preset/test_no_target_performance_preset.py
@@ -0,0 +1,38 @@
+from evidently import ColumnType
+from evidently.test_preset import NoTargetPerformanceTestPreset
+from evidently.tests import TestAllColumnsShareOfMissingValues
+from evidently.tests import TestCatColumnsOutOfListValues
+from evidently.tests import TestColumnsType
+from evidently.tests import TestEmbeddingsDrift
+from evidently.tests import TestNumColumnsMeanInNSigmas
+from evidently.tests import TestNumColumnsOutOfRangeValues
+from evidently.tests import TestShareOfDriftedColumns
+from evidently.utils.data_preprocessing import ColumnDefinition
+from evidently.utils.data_preprocessing import DataDefinition
+
+
+def test_embeddings_data_drift_preset():
+ data_definition = DataDefinition(
+ columns={
+ "target": ColumnDefinition("target", ColumnType.Numerical),
+ },
+ embeddings={
+ "small_set": ["col_1", "col_2"],
+ "big_set": ["col_3", "col_4"],
+ },
+ reference_present=True,
+ )
+ preset = NoTargetPerformanceTestPreset(embeddings=["small_set", "big_set"])
+ tests = preset.generate_tests(data_definition=data_definition, additional_data=None)
+ assert len(tests) == 8
+ expected_tests = [
+ TestShareOfDriftedColumns,
+ TestColumnsType,
+ TestAllColumnsShareOfMissingValues,
+ TestNumColumnsOutOfRangeValues,
+ TestCatColumnsOutOfListValues,
+ TestNumColumnsMeanInNSigmas,
+ TestEmbeddingsDrift,
+ TestEmbeddingsDrift,
+ ]
+ assert expected_tests == [type(test) for test in tests]