diff --git a/src/evidently/features/text_contains_feature.py b/src/evidently/features/text_contains_feature.py index 6b909b95c0..f1b9e6bd33 100644 --- a/src/evidently/features/text_contains_feature.py +++ b/src/evidently/features/text_contains_feature.py @@ -103,7 +103,7 @@ def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) def _as_column(self) -> ColumnName: return self._create_column( self._feature_column_name(), - default_display_name=f"Text Contains of {self.mode} [{', '.join(self.items)}] for {self.column_name}", + default_display_name=f"Text Does Not Contain of {self.mode} [{', '.join(self.items)}] for {self.column_name}", ) def comparison(self, item: str, string: str): diff --git a/src/evidently/v2/datasets.py b/src/evidently/v2/datasets.py index 7aa9cc489f..e24108edb9 100644 --- a/src/evidently/v2/datasets.py +++ b/src/evidently/v2/datasets.py @@ -37,7 +37,7 @@ def __init__(self, type: ColumnType, data: pd.Series) -> None: self.data = data -class Scorer: +class Descriptor: def __init__(self, alias: str): self._alias = alias @@ -50,7 +50,7 @@ def alias(self) -> str: return self._alias -class FeatureScorer(Scorer): +class FeatureDescriptor(Descriptor): def __init__(self, feature: GeneratedFeatures, alias: Optional[str] = None): super().__init__(alias or f"{feature.as_column().display_name}") self._feature = feature @@ -67,7 +67,7 @@ def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, Da return DatasetColumn(type=self._feature.get_type(), data=feature[feature.columns[0]]) -def _determine_scorer_column_name(alias: str, columns: List[str]): +def _determine_desccriptor_column_name(alias: str, columns: List[str]): index = 1 key = alias while key in columns: @@ -84,12 +84,12 @@ def from_pandas( cls, data: pd.DataFrame, data_definition: Optional[DataDefinition] = None, - scorers: Optional[List[Scorer]] = None, + descriptors: Optional[List[Descriptor]] = None, ) -> "Dataset": dataset = PandasDataset(data, data_definition) - for scorer in scorers or []: - key = _determine_scorer_column_name(scorer.alias, data.columns) - new_column = scorer.generate_data(dataset) + for descriptor in descriptors or []: + key = _determine_desccriptor_column_name(descriptor.alias, data.columns) + new_column = descriptor.generate_data(dataset) if isinstance(new_column, DatasetColumn): data[key] = new_column.data elif len(new_column) > 1: diff --git a/src/evidently/v2/scorers/__init__.py b/src/evidently/v2/scorers/__init__.py index 4efe430305..2a53f5618e 100644 --- a/src/evidently/v2/scorers/__init__.py +++ b/src/evidently/v2/scorers/__init__.py @@ -1,41 +1,42 @@ -from ._custom_scorers import CustomColumnScorer -from ._custom_scorers import CustomScorer +from ._custom_descriptors import CustomColumnDescriptor +from ._custom_descriptors import CustomDescriptor from ._text_length import TextLength -from .generated_scorers import begins_with -from .generated_scorers import bert_score -from .generated_scorers import contains -from .generated_scorers import contains_link -from .generated_scorers import does_not_contain -from .generated_scorers import ends_with -from .generated_scorers import exact_match -from .generated_scorers import excludes_words -from .generated_scorers import hugging_face -from .generated_scorers import hugging_face_toxicity -from .generated_scorers import includes_words -from .generated_scorers import is_valid_json -from .generated_scorers import is_valid_python -from .generated_scorers import item_match -from .generated_scorers import item_no_match -from .generated_scorers import json_match -from .generated_scorers import json_schema_match -from .generated_scorers import llm_judge -from .generated_scorers import non_letter_character_percentage -from .generated_scorers import oov_words_percentage -from .generated_scorers import openai -from .generated_scorers import reg_exp -from .generated_scorers import semantic_similarity -from .generated_scorers import sentence_count -from .generated_scorers import sentiment -from .generated_scorers import text_length -from .generated_scorers import trigger_words_present -from .generated_scorers import word_count -from .generated_scorers import word_match -from .generated_scorers import word_no_match -from .generated_scorers import words_presence +from .generated_descriptors import begins_with +from .generated_descriptors import bert_score +from .generated_descriptors import contains +from .generated_descriptors import contains_link +from .generated_descriptors import does_not_contain +from .generated_descriptors import ends_with +from .generated_descriptors import exact_match +from .generated_descriptors import excludes_words +from .generated_descriptors import hugging_face +from .generated_descriptors import hugging_face_toxicity +from .generated_descriptors import includes_words +from .generated_descriptors import is_valid_json +from .generated_descriptors import is_valid_python +from .generated_descriptors import is_valid_sql +from .generated_descriptors import item_match +from .generated_descriptors import item_no_match +from .generated_descriptors import json_match +from .generated_descriptors import json_schema_match +from .generated_descriptors import llm_judge +from .generated_descriptors import non_letter_character_percentage +from .generated_descriptors import oov_words_percentage +from .generated_descriptors import openai +from .generated_descriptors import reg_exp +from .generated_descriptors import semantic_similarity +from .generated_descriptors import sentence_count +from .generated_descriptors import sentiment +from .generated_descriptors import text_length +from .generated_descriptors import trigger_words_present +from .generated_descriptors import word_count +from .generated_descriptors import word_match +from .generated_descriptors import word_no_match +from .generated_descriptors import words_presence __all__ = [ - "CustomColumnScorer", - "CustomScorer", + "CustomColumnDescriptor", + "CustomDescriptor", "TextLength", "bert_score", "begins_with", @@ -50,6 +51,7 @@ "includes_words", "is_valid_json", "is_valid_python", + "is_valid_sql", "item_match", "item_no_match", "json_match", diff --git a/src/evidently/v2/scorers/_custom_scorers.py b/src/evidently/v2/scorers/_custom_descriptors.py similarity index 88% rename from src/evidently/v2/scorers/_custom_scorers.py rename to src/evidently/v2/scorers/_custom_descriptors.py index 8db15bac38..7e1042471a 100644 --- a/src/evidently/v2/scorers/_custom_scorers.py +++ b/src/evidently/v2/scorers/_custom_descriptors.py @@ -5,10 +5,10 @@ from evidently.v2.datasets import Dataset from evidently.v2.datasets import DatasetColumn -from evidently.v2.datasets import Scorer +from evidently.v2.datasets import Descriptor -class CustomColumnScorer(Scorer): +class CustomColumnDescriptor(Descriptor): def __init__(self, column_name: str, func: Callable[[DatasetColumn], DatasetColumn], alias: Optional[str] = None): super().__init__(alias) self._column_name = column_name @@ -19,7 +19,7 @@ def generate_data(self, dataset: Dataset) -> Union[DatasetColumn, Dict[str, Data return self._func(column_data) -class CustomScorer(Scorer): +class CustomDescriptor(Descriptor): def __init__( self, func: Callable[[Dataset], Union[DatasetColumn, Dict[str, DatasetColumn]]], alias: Optional[str] = None ): diff --git a/src/evidently/v2/scorers/_generate_scorers.py b/src/evidently/v2/scorers/_generate_descriptors.py similarity index 92% rename from src/evidently/v2/scorers/_generate_scorers.py rename to src/evidently/v2/scorers/_generate_descriptors.py index 9050ad82c7..7f1fb577eb 100644 --- a/src/evidently/v2/scorers/_generate_scorers.py +++ b/src/evidently/v2/scorers/_generate_descriptors.py @@ -19,9 +19,9 @@ from evidently.features.generated_features import GeneratedFeatures from evidently.features.llm_judge import BaseLLMPromptTemplate from evidently.pydantic_utils import TYPE_ALIASES -from evidently.v2.datasets import FeatureScorer +from evidently.v2.datasets import FeatureDescriptor -SOURCE_FILE = "generated_scorers.py" +SOURCE_FILE = "generated_descriptors.py" REPLACES = { "pandas.core.frame.DataFrame": "DataFrame", @@ -29,7 +29,11 @@ "pandas.core.series.Series": "Series", } -NAME_MAPPING = {"open_a_i_feature": "openai_feature", "is_valid_j_s_o_n": "is_valid_json"} +NAME_MAPPING = { + "open_a_i_feature": "openai_feature", + "is_valid_j_s_o_n": "is_valid_json", + "is_valid_s_q_l": "is_valid_sql", +} SKIP_CLASSES = {CustomFeature, CustomPairColumnFeature, CustomSingleColumnFeature} @@ -80,7 +84,7 @@ def get_args_kwargs(feature_class: Type[GeneratedFeatures]) -> Tuple[Dict[str, s return args, kwargs -def create_scorer_function(feature_class: Type[GeneratedFeatures]): +def create_descriptor_function(feature_class: Type[GeneratedFeatures]): class_name = feature_class.__name__ cmpx = os.path.commonprefix([class_name, class_name.upper()])[:-2] name = cmpx.lower() + re.sub(r"(?\n\n
\n | str1 | \nstr2 | \nText Begins with [first] for str1 | \nText Begins with [first] for str1_1 | \nBERTScore for str1 str2. | \nText Contains of any [first] for str1 | \nstr2 contains link | \nText Contains of any [second] for str1 | \nText Ends with [value] for str1 | \nExact Match for str1 str2. | \n... | \nOOV Words % for str1 | \nRegExp '.*value' Match for column str1 | \nSemantic Similarity for str1 str2. | \nSentence Count for str1 | \nSentiment for str1 | \nText Length for str1 | \nTriggerWordsPresent [words: ['first'], lemmatize: True] for str1 | \nWord Count for str1 | \nText contains includes defined words | \nText does not contain includes defined words | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \nfirst value | \nfirst value | \nTrue | \nTrue | \n0.859864 | \nTrue | \nFalse | \nTrue | \nTrue | \nTrue | \n... | \n0.0 | \n1 | \n1.000000 | \n1 | \n0.34 | \n11 | \n1 | \n2 | \nFalse | \nTrue | \n
1 | \nsecond value | \nhttp://localhost.com | \nFalse | \nFalse | \n0.442877 | \nFalse | \nTrue | \nFalse | \nTrue | \nFalse | \n... | \n0.0 | \n1 | \n0.490672 | \n1 | \n0.34 | \n12 | \n0 | \n2 | \nFalse | \nTrue | \n
2 | \nthird value | \n{\"a\":\"b\"} | \nFalse | \nFalse | \n0.494391 | \nFalse | \nFalse | \nTrue | \nTrue | \nFalse | \n... | \n0.0 | \n1 | \n0.590310 | \n1 | \n0.34 | \n11 | \n0 | \n2 | \nFalse | \nTrue | \n
3 | \nfourth value | \nimport evidently | \nFalse | \nFalse | \n0.658955 | \nFalse | \nFalse | \nTrue | \nTrue | \nFalse | \n... | \n0.0 | \n1 | \n0.548347 | \n1 | \n0.34 | \n12 | \n0 | \n2 | \nFalse | \nTrue | \n
4 rows × 27 columns
\n" + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset.as_dataframe()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-17T02:42:19.272157Z", + "start_time": "2024-12-17T02:42:19.250853Z" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}