From f4ec3fe9ad22ee9c2a133774c309872e90b8696f Mon Sep 17 00:00:00 2001 From: Khor Shu Heng Date: Mon, 5 Feb 2024 10:28:50 +0800 Subject: [PATCH] feat: reuse the same bigquery table for multiple model versions for Arize BigQuery Sink --- python/observation-publisher/Makefile | 7 +- .../publisher/__main__.py | 1 + .../observation-publisher/publisher/metric.py | 3 +- .../publisher/observation_sink.py | 169 ++++++++++--- .../publisher/prediction_log_consumer.py | 34 ++- .../publisher/prediction_log_parser.py | 3 + python/observation-publisher/pyproject.toml | 3 + .../requirements-dev.txt | 3 +- python/observation-publisher/requirements.in | 2 +- python/observation-publisher/requirements.txt | 12 +- .../observation-publisher/tests/__init__.py | 0 .../tests/common_fixtures.py | 13 + .../tests/test_config.py | 11 +- .../tests/test_observation_sink.py | 232 ++++++++++++++---- .../tests/test_prediction_log_consumer.py | 40 +-- 15 files changed, 404 insertions(+), 129 deletions(-) create mode 100644 python/observation-publisher/tests/__init__.py create mode 100644 python/observation-publisher/tests/common_fixtures.py diff --git a/python/observation-publisher/Makefile b/python/observation-publisher/Makefile index d30f50b97..eaf3aa0b1 100644 --- a/python/observation-publisher/Makefile +++ b/python/observation-publisher/Makefile @@ -14,7 +14,12 @@ pip-compile: .PHONY: test test: @echo "Running tests..." - @python -m pytest + @python -m pytest -m "not integration" + +.PHONY: test-integration +integration-test: + @echo "Running integration tests..." + @python -m pytest -m "integration" .PHONY: run run: diff --git a/python/observation-publisher/publisher/__main__.py b/python/observation-publisher/publisher/__main__.py index b048ce147..89860f0d8 100644 --- a/python/observation-publisher/publisher/__main__.py +++ b/python/observation-publisher/publisher/__main__.py @@ -35,6 +35,7 @@ def start_consumer(cfg: PublisherConfig) -> None: prediction_log_consumer.start_polling( observation_sinks=observation_sinks, inference_schema=inference_schema, + model_version=cfg.environment.model_version, ) diff --git a/python/observation-publisher/publisher/metric.py b/python/observation-publisher/publisher/metric.py index 26d5314af..575c15028 100644 --- a/python/observation-publisher/publisher/metric.py +++ b/python/observation-publisher/publisher/metric.py @@ -1,5 +1,5 @@ from pandas import Timestamp -from prometheus_client import Gauge, Counter +from prometheus_client import Counter, Gauge class MetricWriter(object): @@ -21,6 +21,7 @@ def __init__(self): self.total_prediction_logs_processed_counter = Counter( "total_prediction_logs_processed", "The total number of prediction logs processed by the publisher", + ["model_id", "model_version"], ) self._initialized = True diff --git a/python/observation-publisher/publisher/observation_sink.py b/python/observation-publisher/publisher/observation_sink.py index f501c60e0..a09ddbb6c 100644 --- a/python/observation-publisher/publisher/observation_sink.py +++ b/python/observation-publisher/publisher/observation_sink.py @@ -1,6 +1,6 @@ import abc +import time from dataclasses import dataclass -from datetime import datetime, timedelta from typing import List, Tuple import pandas as pd @@ -10,24 +10,19 @@ from arize.utils.types import Environments from arize.utils.types import ModelTypes as ArizeModelType from dataclasses_json import dataclass_json +from google.api_core.exceptions import NotFound from google.cloud.bigquery import Client as BigQueryClient -from google.cloud.bigquery import ( - SchemaField, - Table, - TimePartitioning, - TimePartitioningType, -) -from merlin.observability.inference import ( - BinaryClassificationOutput, - InferenceSchema, - ObservationType, - RankingOutput, - RegressionOutput, - ValueType, -) +from google.cloud.bigquery import (SchemaField, Table, TimePartitioning, + TimePartitioningType) +from merlin.observability.inference import (BinaryClassificationOutput, + InferenceSchema, ObservationType, + RankingOutput, RegressionOutput, + ValueType) from publisher.config import ObservationSinkConfig, ObservationSinkType -from publisher.prediction_log_parser import PREDICTION_LOG_TIMESTAMP_COLUMN +from publisher.prediction_log_parser import (MODEL_VERSION_COLUMN, + PREDICTION_LOG_TIMESTAMP_COLUMN, + ROW_ID_COLUMN, SESSION_ID_COLUMN) class ObservationSink(abc.ABC): @@ -74,6 +69,12 @@ def __init__( model_version: str, arize_client: ArizeClient, ): + """ + :param inference_schema: Inference schema for the ingested model + :param model_id: Merlin model id + :param model_version: Merlin model version + :param arize_client: Arize Pandas Logger client + """ super().__init__(inference_schema, model_id, model_version) self._client = arize_client @@ -101,7 +102,7 @@ def _to_arize_schema(self) -> Tuple[ArizeModelType, ArizeSchema]: elif isinstance(prediction_output, RankingOutput): schema_attributes = self._common_arize_schema_attributes() | dict( rank_column_name=prediction_output.rank_column, - prediction_group_id_column_name=prediction_output.prediction_group_id_column, + prediction_group_id_column_name=SESSION_ID_COLUMN, ) model_type = ArizeModelType.RANKING else: @@ -112,6 +113,14 @@ def _to_arize_schema(self) -> Tuple[ArizeModelType, ArizeSchema]: return model_type, ArizeSchema(**schema_attributes) def write(self, df: pd.DataFrame): + df[self._inference_schema.prediction_id_column] = ( + df[SESSION_ID_COLUMN] + df[ROW_ID_COLUMN] + ) + if isinstance(self._inference_schema.model_prediction_output, RankingOutput): + df[ + self._inference_schema.model_prediction_output.prediction_group_id_column + ] = df[SESSION_ID_COLUMN] + processed_df = self._inference_schema.model_prediction_output.preprocess( df, [ObservationType.FEATURE, ObservationType.PREDICTION] ) @@ -134,17 +143,44 @@ def write(self, df: pd.DataFrame): raise e +@dataclass_json +@dataclass +class BigQueryRetryConfig: + """ + Configuration for retrying failed write attempts. Write could fail due to BigQuery + taking time to update the table schema / create new table. + Attributes: + enabled: Whether to retry failed write attempts + retry_attempts: Number of retry attempts + retry_interval_seconds: Interval between retry attempts + """ + + enabled: bool = False + retry_attempts: int = 4 + retry_interval_seconds: int = 30 + + @dataclass_json @dataclass class BigQueryConfig: + """ + Configuration for writing to BigQuery + Attributes: + project: GCP project id + dataset: BigQuery dataset name + ttl_days: Time to live for the date partition + retry: Configuration for retrying failed write attempts + """ + project: str dataset: str ttl_days: int + retry: BigQueryRetryConfig = BigQueryRetryConfig() class BigQuerySink(ObservationSink): """ - Writes prediction logs to BigQuery. If the destination table doesn't exist, it will be created based on the inference schema.. + Writes prediction logs to BigQuery. If the destination table doesn't exist, it will be created based on the inference schema. """ def __init__( @@ -152,21 +188,54 @@ def __init__( inference_schema: InferenceSchema, model_id: str, model_version: str, - project: str, - dataset: str, - ttl_days: int, + config: BigQueryConfig, ): + """ + :param inference_schema: Inference schema for the ingested model + :param model_id: Merlin model id + :param model_version: Merlin model version + :param config: Configuration to write to bigquery sink + """ super().__init__(inference_schema, model_id, model_version) self._client = BigQueryClient() self._inference_schema = inference_schema self._model_id = model_id self._model_version = model_version - self._project = project - self._dataset = dataset - table = Table(self.write_location, schema=self.schema_fields) - table.time_partitioning = TimePartitioning(type_=TimePartitioningType.DAY) - table.expires = datetime.now() + timedelta(days=ttl_days) - self._table: Table = self._client.create_table(exists_ok=True, table=table) + self._config = config + self._table = self.create_or_update_table() + + @property + def project(self) -> str: + return self._config.project + + @property + def dataset(self) -> str: + return self._config.dataset + + @property + def retry(self) -> BigQueryRetryConfig: + return self._config.retry + + def create_or_update_table(self) -> Table: + try: + original_table = self._client.get_table(self.write_location) + original_schema = original_table.schema + migrated_schema = original_schema[:] + for field in self.schema_fields: + if field not in original_schema: + migrated_schema.append(field) + if migrated_schema == original_schema: + return original_table + original_table.schema = migrated_schema + return self._client.update_table(original_table, ["schema"]) + except NotFound: + table = Table(self.write_location, schema=self.schema_fields) + table.time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, + field=PREDICTION_LOG_TIMESTAMP_COLUMN, + expiration_ms=self._config.ttl_days * 24 * 60 * 60 * 1000, + ) + return self._client.create_table(table=table) @property def schema_fields(self) -> List[SchemaField]: @@ -179,13 +248,21 @@ def schema_fields(self) -> List[SchemaField]: schema_fields = [ SchemaField( - name=self._inference_schema.prediction_id_column, + name=SESSION_ID_COLUMN, + field_type="STRING", + ), + SchemaField( + name=ROW_ID_COLUMN, field_type="STRING", ), SchemaField( name=PREDICTION_LOG_TIMESTAMP_COLUMN, field_type="TIMESTAMP", ), + SchemaField( + name=MODEL_VERSION_COLUMN, + field_type="STRING", + ), ] for feature, feature_type in self._inference_schema.feature_types.items(): schema_fields.append( @@ -207,13 +284,35 @@ def schema_fields(self) -> List[SchemaField]: @property def write_location(self) -> str: - table_name = f"prediction_log_{self._model_id}_{self._model_version}".replace( - "-", "_" - ).replace(".", "_") - return f"{self._project}.{self._dataset}.{table_name}" + table_name = f"prediction_log_{self._model_id}".replace("-", "_").replace( + ".", "_" + ) + return f"{self.project}.{self.dataset}.{table_name}" def write(self, dataframe: pd.DataFrame): - self._client.insert_rows_from_dataframe(dataframe=dataframe, table=self._table) + for i in range(0, self.retry.retry_attempts + 1): + try: + response = self._client.insert_rows_from_dataframe( + dataframe=dataframe, table=self._table + ) + errors = [error for error_chunk in response for error in error_chunk] + if len(errors) > 0: + if not self.retry.enabled: + print("Errors when inserting rows to BigQuery") + return + else: + print( + f"Errors when inserting rows to BigQuery, retrying attempt {i}/{self.retry.retry_attempts}" + ) + time.sleep(self.retry.retry_interval_seconds) + else: + return + except NotFound as e: + print( + f"Table not found: {e}, retrying attempt {i}/{self.retry.retry_attempts}" + ) + time.sleep(self.retry.retry_interval_seconds) + print(f"Failed to write to BigQuery after {self.retry.retry_attempts} attempts") def new_observation_sink( @@ -230,9 +329,7 @@ def new_observation_sink( inference_schema=inference_schema, model_id=model_id, model_version=model_version, - project=bq_config.project, - dataset=bq_config.dataset, - ttl_days=bq_config.ttl_days, + config=bq_config, ) case ObservationSinkType.ARIZE: arize_config: ArizeConfig = ArizeConfig.from_dict(sink_config.config) # type: ignore[attr-defined] diff --git a/python/observation-publisher/publisher/prediction_log_consumer.py b/python/observation-publisher/publisher/prediction_log_consumer.py index 1d6199dde..eef099c53 100644 --- a/python/observation-publisher/publisher/prediction_log_consumer.py +++ b/python/observation-publisher/publisher/prediction_log_consumer.py @@ -14,11 +14,11 @@ from publisher.config import ObservationSource, ObservationSourceConfig from publisher.metric import MetricWriter from publisher.observation_sink import ObservationSink -from publisher.prediction_log_parser import ( - PREDICTION_LOG_TIMESTAMP_COLUMN, - PredictionLogFeatureTable, - PredictionLogResultsTable, -) +from publisher.prediction_log_parser import (MODEL_VERSION_COLUMN, + PREDICTION_LOG_TIMESTAMP_COLUMN, + ROW_ID_COLUMN, SESSION_ID_COLUMN, + PredictionLogFeatureTable, + PredictionLogResultsTable) class PredictionLogConsumer(abc.ABC): @@ -42,6 +42,7 @@ def start_polling( self, observation_sinks: List[ObservationSink], inference_schema: InferenceSchema, + model_version: str, ): try: buffered_logs = [] @@ -58,7 +59,9 @@ def start_polling( and buffered_duration < buffered_max_duration_seconds ): continue - df = log_batch_to_dataframe(buffered_logs, inference_schema) + df = log_batch_to_dataframe( + buffered_logs, inference_schema, model_version + ) most_recent_prediction_timestamp = df[ PREDICTION_LOG_TIMESTAMP_COLUMN ].max() @@ -69,7 +72,7 @@ def start_polling( len(buffered_logs) ) write_tasks = [ - Thread(target=sink.write, args=(df,)) for sink in observation_sinks + Thread(target=sink.write, args=(df.copy(),)) for sink in observation_sinks ] for task in write_tasks: task.start() @@ -160,7 +163,7 @@ def parse_message_to_prediction_log(msg: str) -> PredictionLog: def log_to_records( - log: PredictionLog, inference_schema: InferenceSchema + log: PredictionLog, inference_schema: InferenceSchema, model_version: str ) -> Tuple[List[List[np.int64 | np.float64 | np.bool_ | np.str_]], List[str]]: request_timestamp = log.request_timestamp.ToDatetime() feature_table = PredictionLogFeatureTable.from_struct( @@ -171,7 +174,9 @@ def log_to_records( ) rows = [ - feature_row + prediction_row + [log.prediction_id + row_id, request_timestamp] + feature_row + + prediction_row + + [log.prediction_id, row_id, request_timestamp, model_version] for feature_row, prediction_row, row_id in zip( feature_table.rows, prediction_results_table.rows, @@ -182,18 +187,23 @@ def log_to_records( column_names = ( feature_table.columns + prediction_results_table.columns - + [inference_schema.prediction_id_column, PREDICTION_LOG_TIMESTAMP_COLUMN] + + [ + SESSION_ID_COLUMN, + ROW_ID_COLUMN, + PREDICTION_LOG_TIMESTAMP_COLUMN, + MODEL_VERSION_COLUMN, + ] ) return rows, column_names def log_batch_to_dataframe( - logs: List[PredictionLog], inference_schema: InferenceSchema + logs: List[PredictionLog], inference_schema: InferenceSchema, model_version: str ) -> pd.DataFrame: combined_records = [] column_names: List[str] = [] for log in logs: - rows, column_names = log_to_records(log, inference_schema) + rows, column_names = log_to_records(log, inference_schema, model_version) combined_records.extend(rows) return pd.DataFrame.from_records(combined_records, columns=column_names) diff --git a/python/observation-publisher/publisher/prediction_log_parser.py b/python/observation-publisher/publisher/prediction_log_parser.py index 669da15d2..97ecd4d09 100644 --- a/python/observation-publisher/publisher/prediction_log_parser.py +++ b/python/observation-publisher/publisher/prediction_log_parser.py @@ -7,7 +7,10 @@ from merlin.observability.inference import InferenceSchema, ValueType from typing_extensions import Self +SESSION_ID_COLUMN = "session_id" +ROW_ID_COLUMN = "row_id" PREDICTION_LOG_TIMESTAMP_COLUMN = "request_timestamp" +MODEL_VERSION_COLUMN = "model_version" @dataclass diff --git a/python/observation-publisher/pyproject.toml b/python/observation-publisher/pyproject.toml index ac89db93c..b0b21c481 100644 --- a/python/observation-publisher/pyproject.toml +++ b/python/observation-publisher/pyproject.toml @@ -2,6 +2,9 @@ addopts = [ "--import-mode=importlib", ] +markers = [ + "integration: mark a test as integration test" +] [tool.mypy] exclude = "test.*" diff --git a/python/observation-publisher/requirements-dev.txt b/python/observation-publisher/requirements-dev.txt index d8df3e33f..1a610b802 100644 --- a/python/observation-publisher/requirements-dev.txt +++ b/python/observation-publisher/requirements-dev.txt @@ -4,4 +4,5 @@ types-requests==2.31.0.20231231 types-PyYAML==6.0.12.12 types-jmespath==1.0.2.7 mypy==1.7.1 -mypy-extensions==1.0.0 \ No newline at end of file +mypy-extensions==1.0.0 +db-dtypes==1.2.0 \ No newline at end of file diff --git a/python/observation-publisher/requirements.in b/python/observation-publisher/requirements.in index f42e3bd7f..d7080b2c4 100644 --- a/python/observation-publisher/requirements.in +++ b/python/observation-publisher/requirements.in @@ -1,6 +1,6 @@ confluent-kafka>=2.3.0 caraml-upi-protos>=1.0.0 -arize==7.7.* +arize>=7.7.0 hydra-core>=1.3.0 pandas>=1.0.0 google-cloud-bigquery diff --git a/python/observation-publisher/requirements.txt b/python/observation-publisher/requirements.txt index ebd0d6930..0890b3c2d 100644 --- a/python/observation-publisher/requirements.txt +++ b/python/observation-publisher/requirements.txt @@ -8,6 +8,8 @@ # via -r requirements.in alembic==1.13.0 # via mlflow +annotated-types==0.6.0 + # via pydantic antlr4-python3-runtime==4.9.3 # via # hydra-core @@ -74,7 +76,9 @@ flask==2.3.3 gitdb==4.0.11 # via gitpython gitpython==3.1.40 - # via mlflow + # via + # merlin-sdk + # mlflow google-api-core==2.15.0 # via # google-cloud-bigquery @@ -191,6 +195,10 @@ pyasn1==0.5.1 # rsa pyasn1-modules==0.3.0 # via google-auth +pydantic==2.5.3 + # via merlin-sdk +pydantic-core==2.14.6 + # via pydantic pygments==2.17.2 # via rich pyjwt==2.8.0 @@ -266,6 +274,8 @@ typing-extensions==4.9.0 # via # -r requirements.in # alembic + # pydantic + # pydantic-core # typing-inspect typing-inspect==0.9.0 # via dataclasses-json diff --git a/python/observation-publisher/tests/__init__.py b/python/observation-publisher/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/observation-publisher/tests/common_fixtures.py b/python/observation-publisher/tests/common_fixtures.py new file mode 100644 index 000000000..db018fb57 --- /dev/null +++ b/python/observation-publisher/tests/common_fixtures.py @@ -0,0 +1,13 @@ +import os + +import pytest + + +@pytest.fixture +def bq_project() -> str: + return os.environ.get("INTEGRATION_TEST_BQ_PROJECT") + + +@pytest.fixture +def bq_dataset() -> str: + return os.environ.get("INTEGRATION_TEST_BQ_DATASET") diff --git a/python/observation-publisher/tests/test_config.py b/python/observation-publisher/tests/test_config.py index 3a3ba60f9..199042aa2 100644 --- a/python/observation-publisher/tests/test_config.py +++ b/python/observation-publisher/tests/test_config.py @@ -4,14 +4,9 @@ from merlin.observability.inference import InferenceSchema, ValueType from omegaconf import OmegaConf -from publisher.config import ( - Environment, - ObservationSinkConfig, - ObservationSinkType, - ObservationSource, - ObservationSourceConfig, - PublisherConfig, -) +from publisher.config import (Environment, ObservationSinkConfig, + ObservationSinkType, ObservationSource, + ObservationSourceConfig, PublisherConfig) def test_config_initialization(): diff --git a/python/observation-publisher/tests/test_observation_sink.py b/python/observation-publisher/tests/test_observation_sink.py index de93b2a72..f12306a8e 100644 --- a/python/observation-publisher/tests/test_observation_sink.py +++ b/python/observation-publisher/tests/test_observation_sink.py @@ -1,18 +1,79 @@ +import dataclasses +import time from datetime import datetime from typing import Optional import pandas as pd import pyarrow as pa +import pytest from arize.pandas.logger import Client -from merlin.observability.inference import ( - BinaryClassificationOutput, - InferenceSchema, - RankingOutput, - ValueType, -) +from dateutil import tz +from google.cloud.bigquery import Client as BigQueryClient +from google.cloud.bigquery import SchemaField +from merlin.observability.inference import (BinaryClassificationOutput, + InferenceSchema, RankingOutput, + ValueType) +from pandas._testing import assert_frame_equal from requests import Response -from publisher.observation_sink import ArizeSink +from publisher.observation_sink import (ArizeSink, BigQueryConfig, + BigQueryRetryConfig, BigQuerySink) +from tests.common_fixtures import bq_dataset, bq_project + + +@pytest.fixture +def binary_classification_inference_schema() -> InferenceSchema: + return InferenceSchema( + feature_types={ + "rating": ValueType.FLOAT64, + }, + model_prediction_output=BinaryClassificationOutput( + prediction_score_column="prediction_score", + actual_label_column="actual_label", + positive_class_label="fraud", + negative_class_label="non fraud", + score_threshold=0.5, + ), + ) + + +@pytest.fixture +def binary_classification_inference_logs() -> pd.DataFrame: + request_timestamp = datetime(2024, 1, 1, 0, 0, 0).astimezone(tz.UTC) + return pd.DataFrame.from_records( + [ + [0.8, 0.4, "1234", "a", request_timestamp, "0.1.0", "non fraud"], + [0.5, 0.9, "1234", "b", request_timestamp, "0.1.0", "fraud"], + ], + columns=[ + "rating", + "prediction_score", + "session_id", + "row_id", + "request_timestamp", + "model_version", + "_prediction_label", + ], + ) + + +@pytest.fixture +def ranking_inference_logs() -> pd.DataFrame: + request_timestamp = datetime(2024, 1, 1, 0, 0, 0).astimezone(tz.UTC) + return pd.DataFrame.from_records( + [ + [5.0, 1.0, "1234", "1001", request_timestamp], + [4.0, 0.9, "1234", "1002", request_timestamp], + [3.0, 0.8, "1234", "1003", request_timestamp], + ], + columns=[ + "rating", + "rank_score", + "session_id", + "row_id", + "request_timestamp", + ], + ) class MockResponse(Response): @@ -36,43 +97,24 @@ def _post_file( ) -def test_binary_classification_model_preprocessing_for_arize(): - inference_schema = InferenceSchema( - feature_types={ - "rating": ValueType.FLOAT64, - }, - model_prediction_output=BinaryClassificationOutput( - prediction_score_column="prediction_score", - actual_label_column="actual_label", - positive_class_label="fraud", - negative_class_label="non fraud", - score_threshold=0.5, - ), - ) +def test_binary_classification_model_preprocessing_for_arize( + binary_classification_inference_schema: InferenceSchema, + binary_classification_inference_logs: pd.DataFrame, +): arize_client = MockArizeClient(api_key="test", space_key="test") arize_sink = ArizeSink( - inference_schema, + binary_classification_inference_schema, "test-model", "0.1.0", arize_client, ) - request_timestamp = datetime.now() - input_df = pd.DataFrame.from_records( - [ - [0.8, 0.4, "1234a", request_timestamp], - [0.5, 0.9, "1234b", request_timestamp], - ], - columns=[ - "rating", - "prediction_score", - "prediction_id", - "request_timestamp", - ], - ) - arize_sink.write(input_df) + arize_sink.write(binary_classification_inference_logs) -def test_ranking_model_preprocessing_for_arize(): +def test_ranking_model_preprocessing_for_arize( + binary_classification_inference_logs: pd.DataFrame, + ranking_inference_logs: pd.DataFrame, +): inference_schema = InferenceSchema( feature_types={ "rating": ValueType.FLOAT64, @@ -83,21 +125,6 @@ def test_ranking_model_preprocessing_for_arize(): relevance_score_column="relevance_score_column", ), ) - request_timestamp = datetime.now() - input_df = pd.DataFrame.from_records( - [ - [5.0, 1.0, "1234", "1001", request_timestamp], - [4.0, 0.9, "1234", "1001", request_timestamp], - [3.0, 0.8, "1234", "1001", request_timestamp], - ], - columns=[ - "rating", - "rank_score", - "prediction_id", - "order_id", - "request_timestamp", - ], - ) arize_client = MockArizeClient(api_key="test", space_key="test") arize_sink = ArizeSink( inference_schema, @@ -105,4 +132,105 @@ def test_ranking_model_preprocessing_for_arize(): "0.1.0", arize_client, ) - arize_sink.write(input_df) + arize_sink.write(ranking_inference_logs) + + +@pytest.mark.integration +def test_bigquery_sink_schema_migration( + bq_project: str, + bq_dataset: str, + binary_classification_inference_schema: InferenceSchema, + binary_classification_inference_logs: pd.DataFrame, +): + client = BigQueryClient() + client.delete_table( + f"{bq_project}.{bq_dataset}.prediction_log_test_model", not_found_ok=True + ) + bq_sink = BigQuerySink( + binary_classification_inference_schema, + "test-model", + "0.1.0", + config=BigQueryConfig( + project=bq_project, + dataset=bq_dataset, + ttl_days=14, + retry=BigQueryRetryConfig( + enabled=True, retry_attempts=3, retry_interval_seconds=10 + ), + ), + ) + bq_sink.write(binary_classification_inference_logs) + migrated_schema = dataclasses.replace(binary_classification_inference_schema) + migrated_schema.feature_types = { + "rating_v2": ValueType.FLOAT64, + } + migrated_bq_sink = BigQuerySink( + migrated_schema, + "test-model", + "0.2.0", + config=BigQueryConfig( + project=bq_project, + dataset=bq_dataset, + ttl_days=14, + retry=BigQueryRetryConfig( + enabled=True, retry_attempts=5, retry_interval_seconds=30 + ), + ), + ) + migrated_inference_logs = binary_classification_inference_logs.rename( + columns={"rating": "rating_v2"} + ) + migrated_inference_logs["model_version"] = "0.2.0" + migrated_bq_sink.write(migrated_inference_logs) + version_update_bq_sink = BigQuerySink( + migrated_schema, + "test-model", + "0.3.0", + config=BigQueryConfig( + project=bq_project, + dataset=bq_dataset, + ttl_days=14, + ), + ) + version_update_inference_logs = migrated_inference_logs.copy() + version_update_inference_logs["model_version"] = "0.3.0" + version_update_bq_sink.write(version_update_inference_logs) + + table = client.get_table(f"{bq_project}.{bq_dataset}.prediction_log_test_model") + assert table.schema == [ + SchemaField(name="session_id", field_type="STRING"), + SchemaField(name="row_id", field_type="STRING"), + SchemaField(name="request_timestamp", field_type="TIMESTAMP"), + SchemaField(name="model_version", field_type="STRING"), + SchemaField(name="rating", field_type="FLOAT"), + SchemaField(name="prediction_score", field_type="FLOAT"), + SchemaField(name="_prediction_label", field_type="STRING"), + SchemaField(name="rating_v2", field_type="FLOAT"), + ] + df = client.query( + "SELECT * FROM `{}.{}.prediction_log_test_model`".format(bq_project, bq_dataset) + ).to_dataframe() + df.reset_index(drop=True, inplace=True) + event_timestamp = datetime(2024, 1, 1, 0, 0, 0).astimezone(tz.UTC) + expected_df = pd.DataFrame.from_records( + [ + [0.8, 0.4, "1234", "a", event_timestamp, "0.1.0", "non fraud", None], + [0.5, 0.9, "1234", "b", event_timestamp, "0.1.0", "fraud", None], + [None, 0.4, "1234", "a", event_timestamp, "0.2.0", "non fraud", 0.8], + [None, 0.9, "1234", "b", event_timestamp, "0.2.0", "fraud", 0.5], + [None, 0.4, "1234", "a", event_timestamp, "0.3.0", "non fraud", 0.8], + [None, 0.9, "1234", "b", event_timestamp, "0.3.0", "fraud", 0.5], + ], + columns=[ + "rating", + "prediction_score", + "session_id", + "row_id", + "request_timestamp", + "model_version", + "_prediction_label", + "rating_v2", + ], + ) + expected_df.reset_index(drop=True, inplace=True) + assert_frame_equal(df, expected_df, check_like=True) diff --git a/python/observation-publisher/tests/test_prediction_log_consumer.py b/python/observation-publisher/tests/test_prediction_log_consumer.py index db2a71569..c41f9b60d 100644 --- a/python/observation-publisher/tests/test_prediction_log_consumer.py +++ b/python/observation-publisher/tests/test_prediction_log_consumer.py @@ -4,11 +4,8 @@ import numpy as np import pandas as pd from caraml.upi.v1.prediction_log_pb2 import PredictionLog -from merlin.observability.inference import ( - BinaryClassificationOutput, - InferenceSchema, - ValueType, -) +from merlin.observability.inference import (BinaryClassificationOutput, + InferenceSchema, ValueType) from pandas._testing import assert_frame_equal from publisher.prediction_log_consumer import log_batch_to_dataframe @@ -77,6 +74,7 @@ def test_log_to_dataframe(): "service_type", ] output_columns = ["prediction_score"] + request_timestamp = datetime(2021, 1, 1, 0, 0, 0) prediction_logs = [ new_prediction_log( prediction_id="1234", @@ -92,7 +90,7 @@ def test_log_to_dataframe(): [0.9], [0.5], ], - request_timestamp=datetime(2021, 1, 1, 0, 0, 0), + request_timestamp=request_timestamp, row_ids=["a", "b"], ), new_prediction_log( @@ -109,25 +107,29 @@ def test_log_to_dataframe(): [0.4], [0.2], ], - request_timestamp=datetime(2021, 1, 1, 0, 0, 0), + request_timestamp=request_timestamp, row_ids=["c", "d"], ), ] - prediction_logs_df = log_batch_to_dataframe(prediction_logs, inference_schema) + prediction_logs_df = log_batch_to_dataframe( + prediction_logs, inference_schema, model_version + ) expected_df = pd.DataFrame.from_records( [ - [0.8, 24, "FOOD", 0.9, "1234a", datetime(2021, 1, 1, 0, 0, 0)], - [0.5, 2, "RIDE", 0.5, "1234b", datetime(2021, 1, 1, 0, 0, 0)], - [1.0, 13, "CAR", 0.4, "5678c", datetime(2021, 1, 1, 0, 0, 0)], - [0.4, 60, "RIDE", 0.2, "5678d", datetime(2021, 1, 1, 0, 0, 0)], + [0.8, 24, "FOOD", 0.9, "1234", "a", request_timestamp, model_version], + [0.5, 2, "RIDE", 0.5, "1234", "b", request_timestamp, model_version], + [1.0, 13, "CAR", 0.4, "5678", "c", request_timestamp, model_version], + [0.4, 60, "RIDE", 0.2, "5678", "d", request_timestamp, model_version], ], columns=[ "acceptance_rate", "minutes_since_last_order", "service_type", "prediction_score", - "prediction_id", + "session_id", + "row_id", "request_timestamp", + "model_version", ], ) assert_frame_equal(prediction_logs_df, expected_df) @@ -165,21 +167,27 @@ def test_empty_column_conversion_to_dataframe(): row_ids=["a"], ), ] - prediction_logs_df = log_batch_to_dataframe(prediction_logs, inference_schema) + prediction_logs_df = log_batch_to_dataframe( + prediction_logs, inference_schema, model_version + ) expected_df = pd.DataFrame.from_records( [ [ np.NaN, 0.5, - "1234a", + "1234", + "a", datetime(2021, 1, 1, 0, 0, 0), + "0.1.0", ], ], columns=[ "acceptance_rate", "prediction_score", - "prediction_id", + "session_id", + "row_id", "request_timestamp", + "model_version", ], ) assert_frame_equal(prediction_logs_df, expected_df)