From ebce098f83b114994160acff51fad7e6a4a64c07 Mon Sep 17 00:00:00 2001 From: Tio Pramayudi Date: Wed, 24 Jan 2024 17:31:35 +0700 Subject: [PATCH] Fix version patch and post --- api/api/versions_api.go | 1 + api/api/versions_api_test.go | 316 +++++++++++++++++++++++++++ api/models/version.go | 10 +- python/sdk/client/api_client.py | 2 + python/sdk/merlin/model.py | 4 + python/sdk/merlin/model_schema.py | 9 +- python/sdk/test/integration_test.py | 39 +++- python/sdk/test/model_schema_test.py | 9 +- 8 files changed, 380 insertions(+), 10 deletions(-) diff --git a/api/api/versions_api.go b/api/api/versions_api.go index 08e507930..0777312d0 100644 --- a/api/api/versions_api.go +++ b/api/api/versions_api.go @@ -137,6 +137,7 @@ func (c *VersionsController) CreateVersion(r *http.Request, vars map[string]stri ArtifactURI: run.Info.ArtifactURI, Labels: versionPost.Labels, PythonVersion: versionPost.PythonVersion, + ModelSchema: versionPost.ModelSchema, } version, _ = c.VersionsService.Save(ctx, version, c.FeatureToggleConfig.MonitoringConfig) diff --git a/api/api/versions_api_test.go b/api/api/versions_api_test.go index e8938f088..4e36f016f 100644 --- a/api/api/versions_api_test.go +++ b/api/api/versions_api_test.go @@ -708,6 +708,170 @@ func TestPatchVersion(t *testing.T) { data: Error{Message: "Error patching model version: Error creating secret: db is down"}, }, }, + { + desc: "Should success update model schema", + vars: map[string]string{ + "model_id": "1", + "version_id": "1", + }, + requestBody: &models.VersionPatch{ + Properties: &models.KV{ + "name": "model-1", + "created_by": "anonymous", + }, + ModelSchema: &models.ModelSchema{ + Spec: &models.SchemaSpec{ + PredictionIDColumn: "prediction_id", + ModelPredictionOutput: &models.ModelPredictionOutput{ + RankingOutput: &models.RankingOutput{ + PredictionGroudIDColumn: "session_id", + RankScoreColumn: "score", + RelevanceScoreColumn: "relevance_score", + OutputClass: models.Ranking, + }, + }, + FeatureTypes: map[string]models.ValueType{ + "featureA": models.Float64, + "featureB": models.Int64, + "featureC": models.Boolean, + }, + }, + ModelID: models.ID(1), + }, + }, + versionService: func() *mocks.VersionsService { + svc := &mocks.VersionsService{} + svc.On("FindByID", mock.Anything, models.ID(1), models.ID(1), mock.Anything).Return( + &models.Version{ + ID: models.ID(1), + ModelID: models.ID(1), + Model: &models.Model{ + ID: models.ID(1), + Name: "model-1", + ProjectID: models.ID(1), + Project: mlp.Project{}, + ExperimentID: 1, + Type: "pyfunc", + MlflowURL: "http://mlflow.com", + }, + MlflowURL: "http://mlflow.com", + }, nil) + svc.On("Save", mock.Anything, &models.Version{ + ID: models.ID(1), + ModelID: models.ID(1), + Model: &models.Model{ + ID: models.ID(1), + Name: "model-1", + ProjectID: models.ID(1), + Project: mlp.Project{}, + ExperimentID: 1, + Type: "pyfunc", + MlflowURL: "http://mlflow.com", + }, + MlflowURL: "http://mlflow.com", + Properties: models.KV{ + "name": "model-1", + "created_by": "anonymous", + }, + ModelSchema: &models.ModelSchema{ + Spec: &models.SchemaSpec{ + PredictionIDColumn: "prediction_id", + ModelPredictionOutput: &models.ModelPredictionOutput{ + RankingOutput: &models.RankingOutput{ + PredictionGroudIDColumn: "session_id", + RankScoreColumn: "score", + RelevanceScoreColumn: "relevance_score", + OutputClass: models.Ranking, + }, + }, + FeatureTypes: map[string]models.ValueType{ + "featureA": models.Float64, + "featureB": models.Int64, + "featureC": models.Boolean, + }, + }, + ModelID: models.ID(1), + }, + }, mock.Anything).Return(&models.Version{ + ID: models.ID(1), + ModelID: models.ID(1), + Model: &models.Model{ + ID: models.ID(1), + Name: "model-1", + ProjectID: models.ID(1), + Project: mlp.Project{}, + ExperimentID: 1, + Type: "pyfunc", + MlflowURL: "http://mlflow.com", + }, + MlflowURL: "http://mlflow.com", + Properties: models.KV{ + "name": "model-1", + "created_by": "anonymous", + }, + ModelSchema: &models.ModelSchema{ + Spec: &models.SchemaSpec{ + PredictionIDColumn: "prediction_id", + ModelPredictionOutput: &models.ModelPredictionOutput{ + RankingOutput: &models.RankingOutput{ + PredictionGroudIDColumn: "session_id", + RankScoreColumn: "score", + RelevanceScoreColumn: "relevance_score", + OutputClass: models.Ranking, + }, + }, + FeatureTypes: map[string]models.ValueType{ + "featureA": models.Float64, + "featureB": models.Int64, + "featureC": models.Boolean, + }, + }, + ModelID: models.ID(1), + }, + }, nil) + return svc + }, + expected: &Response{ + code: http.StatusOK, + data: &models.Version{ + ID: models.ID(1), + ModelID: models.ID(1), + Model: &models.Model{ + ID: models.ID(1), + Name: "model-1", + ProjectID: models.ID(1), + Project: mlp.Project{}, + ExperimentID: 1, + Type: "pyfunc", + MlflowURL: "http://mlflow.com", + }, + MlflowURL: "http://mlflow.com", + Properties: models.KV{ + "name": "model-1", + "created_by": "anonymous", + }, + ModelSchema: &models.ModelSchema{ + Spec: &models.SchemaSpec{ + PredictionIDColumn: "prediction_id", + ModelPredictionOutput: &models.ModelPredictionOutput{ + RankingOutput: &models.RankingOutput{ + PredictionGroudIDColumn: "session_id", + RankScoreColumn: "score", + RelevanceScoreColumn: "relevance_score", + OutputClass: models.Ranking, + }, + }, + FeatureTypes: map[string]models.ValueType{ + "featureA": models.Float64, + "featureB": models.Int64, + "featureC": models.Boolean, + }, + }, + ModelID: models.ID(1), + }, + }, + }, + }, } for _, tC := range testCases { t.Run(tC.desc, func(t *testing.T) { @@ -1155,6 +1319,158 @@ func TestCreateVersion(t *testing.T) { }, }, }, + { + desc: "Should successfully create version with model schema", + vars: map[string]string{ + "model_id": "1", + }, + body: models.VersionPost{ + ModelSchema: &models.ModelSchema{ + Spec: &models.SchemaSpec{ + PredictionIDColumn: "prediction_id", + ModelPredictionOutput: &models.ModelPredictionOutput{ + RankingOutput: &models.RankingOutput{ + PredictionGroudIDColumn: "session_id", + RankScoreColumn: "score", + RelevanceScoreColumn: "relevance_score", + OutputClass: models.Ranking, + }, + }, + FeatureTypes: map[string]models.ValueType{ + "featureA": models.Float64, + "featureB": models.Int64, + "featureC": models.Boolean, + }, + }, + ModelID: models.ID(1), + }, + }, + modelsService: func() *mocks.ModelsService { + svc := &mocks.ModelsService{} + svc.On("FindByID", mock.Anything, models.ID(1)).Return(&models.Model{ + ID: models.ID(1), + Name: "model-1", + ProjectID: models.ID(1), + Project: mlp.Project{ + MLFlowTrackingURL: "http://www.notinuse.com", + }, + ExperimentID: 1, + Type: "pyfunc", + MlflowURL: "http://mlflow.com", + Endpoints: nil, + }, nil) + return svc + }, + mlflowClient: func() *mlfmocks.Client { + svc := &mlfmocks.Client{} + svc.On("CreateRun", "1").Return(&mlflow.Run{ + Info: mlflow.Info{ + RunID: "1", + ArtifactURI: "artifact/url/run", + }, + }, nil) + return svc + }, + versionService: func() *mocks.VersionsService { + svc := &mocks.VersionsService{} + svc.On("Save", mock.Anything, &models.Version{ + ModelID: models.ID(1), + RunID: "1", + ArtifactURI: "artifact/url/run", + PythonVersion: DEFAULT_PYTHON_VERSION, + ModelSchema: &models.ModelSchema{ + Spec: &models.SchemaSpec{ + PredictionIDColumn: "prediction_id", + ModelPredictionOutput: &models.ModelPredictionOutput{ + RankingOutput: &models.RankingOutput{ + PredictionGroudIDColumn: "session_id", + RankScoreColumn: "score", + RelevanceScoreColumn: "relevance_score", + OutputClass: models.Ranking, + }, + }, + FeatureTypes: map[string]models.ValueType{ + "featureA": models.Float64, + "featureB": models.Int64, + "featureC": models.Boolean, + }, + }, + ModelID: models.ID(1), + }, + }, mock.Anything).Return(&models.Version{ + ID: models.ID(1), + ModelID: models.ID(1), + Model: &models.Model{ + ID: models.ID(1), + Name: "model-1", + ProjectID: models.ID(1), + Project: mlp.Project{}, + ExperimentID: 1, + Type: "sklearn", + MlflowURL: "http://mlflow.com", + }, + MlflowURL: "http://mlflow.com", + PythonVersion: DEFAULT_PYTHON_VERSION, + ModelSchema: &models.ModelSchema{ + Spec: &models.SchemaSpec{ + PredictionIDColumn: "prediction_id", + ModelPredictionOutput: &models.ModelPredictionOutput{ + RankingOutput: &models.RankingOutput{ + PredictionGroudIDColumn: "session_id", + RankScoreColumn: "score", + RelevanceScoreColumn: "relevance_score", + OutputClass: models.Ranking, + }, + }, + FeatureTypes: map[string]models.ValueType{ + "featureA": models.Float64, + "featureB": models.Int64, + "featureC": models.Boolean, + }, + }, + ModelID: models.ID(1), + }, + }, nil) + return svc + }, + expected: &Response{ + code: http.StatusCreated, + data: &models.Version{ + ID: models.ID(1), + ModelID: models.ID(1), + Model: &models.Model{ + ID: models.ID(1), + Name: "model-1", + ProjectID: models.ID(1), + Project: mlp.Project{}, + ExperimentID: 1, + Type: "sklearn", + MlflowURL: "http://mlflow.com", + }, + MlflowURL: "http://mlflow.com", + PythonVersion: DEFAULT_PYTHON_VERSION, + ModelSchema: &models.ModelSchema{ + Spec: &models.SchemaSpec{ + PredictionIDColumn: "prediction_id", + ModelPredictionOutput: &models.ModelPredictionOutput{ + RankingOutput: &models.RankingOutput{ + PredictionGroudIDColumn: "session_id", + RankScoreColumn: "score", + RelevanceScoreColumn: "relevance_score", + OutputClass: models.Ranking, + }, + }, + FeatureTypes: map[string]models.ValueType{ + "featureA": models.Float64, + "featureB": models.Int64, + "featureC": models.Boolean, + }, + }, + ModelID: models.ID(1), + }, + }, + }, + }, } for _, tC := range testCases { t.Run(tC.desc, func(t *testing.T) { diff --git a/api/models/version.go b/api/models/version.go index 4336c74ed..1bba9ff33 100644 --- a/api/models/version.go +++ b/api/models/version.go @@ -40,13 +40,15 @@ type Version struct { } type VersionPost struct { - Labels KV `json:"labels" gorm:"labels"` - PythonVersion string `json:"python_version" gorm:"python_version"` + Labels KV `json:"labels" gorm:"labels"` + PythonVersion string `json:"python_version" gorm:"python_version"` + ModelSchema *ModelSchema `json:"model_schema"` } type VersionPatch struct { Properties *KV `json:"properties,omitempty"` CustomPredictor *CustomPredictor `json:"custom_predictor,omitempty"` + ModelSchema *ModelSchema `json:"model_schema"` } type CustomPredictor struct { @@ -100,6 +102,10 @@ func (v *Version) Patch(patch *VersionPatch) error { } v.CustomPredictor = patch.CustomPredictor } + if patch.ModelSchema != nil { + v.ModelSchema = patch.ModelSchema + } + return nil } diff --git a/python/sdk/client/api_client.py b/python/sdk/client/api_client.py index 5bf60ec80..27e801662 100644 --- a/python/sdk/client/api_client.py +++ b/python/sdk/client/api_client.py @@ -296,6 +296,7 @@ def response_deserialize( # if not found, look for '1XX', '2XX', etc. response_type = response_types_map.get(str(response_data.status)[0] + "XX", None) + print(f"response status ----- {response_data.status}") if not 200 <= response_data.status <= 299: if response_data.status == 400: raise BadRequestException(http_resp=response_data) @@ -328,6 +329,7 @@ def response_deserialize( match = re.search(r"charset=([a-zA-Z\-\d]+)[\s;]?", content_type) encoding = match.group(1) if match else "utf-8" response_text = response_data.data.decode(encoding) + print(f"response_text ------ {response_text}") return_data = self.deserialize(response_text, response_type) return ApiResponse( diff --git a/python/sdk/merlin/model.py b/python/sdk/merlin/model.py index 7a9924743..a3c6d3c09 100644 --- a/python/sdk/merlin/model.py +++ b/python/sdk/merlin/model.py @@ -783,6 +783,10 @@ def url(self) -> str: model_id = self.model.id base_url = guess_mlp_ui_url(self.model.project.url) return f"{base_url}/projects/{project_id}/models/{model_id}/versions" + + @property + def model_schema(self) -> Optional[ModelSchema]: + return self._model_schema def start(self): """ diff --git a/python/sdk/merlin/model_schema.py b/python/sdk/merlin/model_schema.py index 376d18c2b..d5a9fbf86 100644 --- a/python/sdk/merlin/model_schema.py +++ b/python/sdk/merlin/model_schema.py @@ -81,18 +81,21 @@ def _to_client_prediction_output_spec(self) -> client.ModelPredictionOutput: actual_label_column=prediction_output.actual_label_column, positive_class_label=prediction_output.positive_class_label, negative_class_label=prediction_output.negative_class_label, - score_threshold=prediction_output.score_threshold + score_threshold=prediction_output.score_threshold, + output_class=client.ModelPredictionOutputClass(BinaryClassificationOutput.__name__) )) elif isinstance(prediction_output, RegressionOutput): return client.ModelPredictionOutput(client.RegressionOutput( actual_score_column=prediction_output.actual_score_column, - prediction_score_column=prediction_output.prediction_score_column + prediction_score_column=prediction_output.prediction_score_column, + output_class=client.ModelPredictionOutputClass(RegressionOutput.__name__) )) elif isinstance(prediction_output, RankingOutput): return client.ModelPredictionOutput(client.RankingOutput( relevance_score_column=prediction_output.relevance_score_column, prediction_group_id_column=prediction_output.prediction_group_id_column, - rank_score_column=prediction_output.rank_score_column + rank_score_column=prediction_output.rank_score_column, + output_class=client.ModelPredictionOutputClass(RankingOutput.__name__) )) raise ValueError("model prediction output is not recognized") diff --git a/python/sdk/test/integration_test.py b/python/sdk/test/integration_test.py index 44206c2cd..bac19c8f3 100644 --- a/python/sdk/test/integration_test.py +++ b/python/sdk/test/integration_test.py @@ -128,7 +128,37 @@ def test_xgboost( undeploy_all_version() - with merlin.new_model_version(model_schema=ModelSchema(spec=InferenceSchema( + with merlin.new_model_version() as v: + # Upload the serialized model to MLP + merlin.log_model(model_dir=model_dir) + + endpoint = merlin.deploy(v, deployment_mode=deployment_mode) + resp = requests.post(f"{endpoint.url}", json=request_json) + + assert resp.status_code == 200 + assert resp.json() is not None + assert len(resp.json()["predictions"]) == len(request_json["instances"]) + + merlin.undeploy(v) + +@pytest.mark.integration +@pytest.mark.dependency() +@pytest.mark.parametrize( + "deployment_mode", [DeploymentMode.RAW_DEPLOYMENT, DeploymentMode.SERVERLESS] +) +def test_model_schema( + integration_test_url, project_name, deployment_mode, use_google_oauth, requests +): + merlin.set_url(integration_test_url, use_google_oauth=use_google_oauth) + merlin.set_project(project_name) + merlin.set_model( + f"model-schema-{deployment_mode_suffix(deployment_mode)}", ModelType.XGBOOST + ) + + model_dir = "test/xgboost-model" + + undeploy_all_version() + model_schema = ModelSchema(spec=InferenceSchema( feature_types={ "featureA": ValueType.FLOAT64, "featureB": ValueType.INT64, @@ -143,10 +173,14 @@ def test_xgboost( negative_class_label="non_complete", score_threshold=0.7 ) - ))) as v: + )) + + with merlin.new_model_version(model_schema=model_schema) as v: # Upload the serialized model to MLP merlin.log_model(model_dir=model_dir) + assert v.model_schema == model_schema + endpoint = merlin.deploy(v, deployment_mode=deployment_mode) resp = requests.post(f"{endpoint.url}", json=request_json) @@ -157,6 +191,7 @@ def test_xgboost( merlin.undeploy(v) + @pytest.mark.integration def test_mlflow_tracking( integration_test_url, project_name, use_google_oauth, requests diff --git a/python/sdk/test/model_schema_test.py b/python/sdk/test/model_schema_test.py index 47345d484..500745e62 100644 --- a/python/sdk/test/model_schema_test.py +++ b/python/sdk/test/model_schema_test.py @@ -26,7 +26,8 @@ actual_label_column="actual_label", positive_class_label="positive", negative_class_label="negative", - score_threshold=0.5 + score_threshold=0.5, + output_class=client.ModelPredictionOutputClass.BINARYCLASSIFICATIONOUTPUT ) ) ) @@ -70,7 +71,8 @@ model_prediction_output=client.ModelPredictionOutput( client.RegressionOutput( prediction_score_column="prediction_score", - actual_score_column="actual_score" + actual_score_column="actual_score", + output_class=client.ModelPredictionOutputClass.REGRESSIONOUTPUT ) ) ) @@ -112,7 +114,8 @@ client.RankingOutput( rank_score_column="score", prediction_group_id_column="session_id", - relevance_score_column="relevance_score" + relevance_score_column="relevance_score", + output_class=client.ModelPredictionOutputClass.RANKINGOUTPUT ) ) )