From e16e67c3b964d956204b564430bf69c02dfb18db Mon Sep 17 00:00:00 2001 From: Tio Pramayudi Date: Thu, 1 Feb 2024 10:01:32 +0700 Subject: [PATCH] Add documentation for model schema and model observability --- docs/diagrams/model_observability.drawio.svg | 4 + docs/user/templates/09_model_observability.md | 3 - docs/user/templates/09_model_schema.md | 90 +++++++++++++++++ docs/user/templates/10_model_observability.md | 99 +++++++++++++++++++ python/sdk/merlin/autoscaling.py | 5 +- python/sdk/merlin/logger.py | 95 +++++++++++++----- python/sdk/merlin/merlin.py | 9 +- python/sdk/merlin/observability/inference.py | 8 +- python/sdk/merlin/validation.py | 21 ++-- python/sdk/pyfunc.Dockerfile | 36 ------- 10 files changed, 286 insertions(+), 84 deletions(-) create mode 100644 docs/diagrams/model_observability.drawio.svg delete mode 100644 docs/user/templates/09_model_observability.md create mode 100644 docs/user/templates/09_model_schema.md create mode 100644 docs/user/templates/10_model_observability.md delete mode 100644 python/sdk/pyfunc.Dockerfile diff --git a/docs/diagrams/model_observability.drawio.svg b/docs/diagrams/model_observability.drawio.svg new file mode 100644 index 000000000..ee7dfd39d --- /dev/null +++ b/docs/diagrams/model_observability.drawio.svg @@ -0,0 +1,4 @@ + + + +
Merlin Model
Merlin Model
Consumer
Consumer
ML Observabiility System
ML Observabiility Sys...
Model Deployment Workflow
Model Deployment Wor...
....
....
Train Model
Train Model
Publish Training Dataset
Publish Training...
Deploy Model
Deploy Model
....
....
Ground Truth Collector Workflow
Ground Truth Collector Wo...
....
....
Fetch Ground Truth
Fetch Ground Tr...
Publish Ground Truth
Publish Ground...
Prediction
Data BQ
Prediction...
\ No newline at end of file diff --git a/docs/user/templates/09_model_observability.md b/docs/user/templates/09_model_observability.md deleted file mode 100644 index d224fcffa..000000000 --- a/docs/user/templates/09_model_observability.md +++ /dev/null @@ -1,3 +0,0 @@ - -# Model Observability -Model observability enable model's owner to observe and analyze their model in production by look at the performance and drift metrics. \ No newline at end of file diff --git a/docs/user/templates/09_model_schema.md b/docs/user/templates/09_model_schema.md new file mode 100644 index 000000000..c32dfde9d --- /dev/null +++ b/docs/user/templates/09_model_schema.md @@ -0,0 +1,90 @@ + + +# Model Schema + +Model schema is a specification of input and output of a model, such as what are the features columns, prediction columns and also ground truth columns. Following are the fields in model schema: + +| Field | Type | Description | Mandatory | +|-------|------|-------------|-----------| +| `id` | int | Unique identifier for each model schema | Not mandatory, if ID is not specified it will create new model schema otherwise it will update the model schema with corresponding ID | +| `model_id`| int | Model ID that correlate with the schema | Not mandatory, if not specified the SDK will assign it with the model that user set | +| `spec` | InferenceSchema | Detail specification for model schema | True | + +Detail specification is defined by using `InferenceSchema` class, following are the fields: +| Field | Type | Description | Mandatory | +|-------|------|-------------|-----------| +| `feature_types` | Dict[str, ValueType] | Mapping between feature name with the type of the feature | True | +| `model_prediction_output` | PredictionOutput | Prediction specification that differ between model types, e.g BinaryClassificationOutput, RegressionOutput, RankingOutput | True | +| `prediction_id_column` | str | The column name that contains prediction id value | True | +| `tag_columns` | Optional[List[str]] | List of column names that contains additional information about prediction, you can treat it as metadata | False | + +From above we can see `model_prediction_output` field that has type `PredictionOutput`, this field is a specification of prediction that is generated by the model depending on it's model type. Currently we support 3 model types in the schema: +* Binary Classification +* Regression +* Ranking + +Each model type has it's own model prediction output specification. + +## Binary Classification +Model prediction output specification for Binary Classification type is `BinaryClassificationOutput` that has following fields: + +| Field | Type | Description | Mandatory | +|-------|------|-------------|-----------| +| `prediction_score_column` | str | Column that contains prediction score value of a model. Prediction score must be between 0.0 and 1.0 | True | +| `actual_label_column` | str | Name of the column containing the actual class | False, because not all model has the ground truth | +| `positive_class_label` | str | Label for positive class | True | +| `negative_class_label` | str | Label for negative class | True | +| `score_threshold` | float | Score threshold for prediction to be considered as positive class | False, if not specified it will use 0.5 as default | + +## Regression +Model prediction output specification for Regression type is `RegressionOutput` that has following fields: + +| Field | Type | Description | Mandatory | +|-------|------|-------------|-----------| +| `prediction_score_column` | str | Column that contains prediction score value of a model | True | +| `actual_score_column` | str | Name of the column containing the actual score | False, because not all model has the ground truth | + + +## Ranking +Model prediction output specification for Ranking type is `RankingOutput` that has following fields: + +| Field | Type | Description | Mandatory | +|-------|------|-------------|-----------| +| `rank_score_column` | str | Name of the column containing the ranking score of the prediction | True | +| `prediction_group_id_column` | str | Name of the column containing the prediction group id | True | +| `relevance_score_column` | str | Name of the column containing the relevance score of the prediction | True | + +## Define model schema +From the specification above, users can create the schema for their model. Suppose that users have binary classification model, that has 4 features +* featureA that has float type +* featureB that has int type +* featureC that has string type +* featureD that has float type + +With positive class `complete` and negative class `non_complete` and the threshold for positive class is 0.75. Actual label is stored under column `target`, `prediction_score` under column `score` `prediction_id` under column `prediction_id`. From that specification, users can define the model schema and put it alongside version creation. Below is the example snipped code + +```python +from merlin.model_schema import ModelSchema +from merlin.observability.inference import InferenceSchema, ValueType, BinaryClassificationOutput + model_schema = ModelSchema(spec=InferenceSchema( + feature_types={ + "featureA": ValueType.FLOAT64, + "featureB": ValueType.INT64, + "featureC": ValueType.STRING, + "featureD": ValueType.BOOLEAN + }, + prediction_id_column="prediction_id", + model_prediction_output=BinaryClassificationOutput( + prediction_score_column="score", + actual_label_column="target", + positive_class_label="complete", + negative_class_label="non_complete", + score_threshold=0.75 + ) + )) +with merlin.new_model_version(model_schema=model_schema) as v: + .... + +``` + +The above snipped code will define model schema and attach it to certain model version, the reason is the schema for each version is possible to differ. \ No newline at end of file diff --git a/docs/user/templates/10_model_observability.md b/docs/user/templates/10_model_observability.md new file mode 100644 index 000000000..cd6c9ca01 --- /dev/null +++ b/docs/user/templates/10_model_observability.md @@ -0,0 +1,99 @@ + +# Model Observability +Model observability enable model's owners to observe and analyze their model in production by looking at the performance and drift metrics. Performance indicate how well your model to do prediction compare to the actual output, and drift indicate the difference of distribution between two datasets. To calculate those metrics the model observability system needs the following data: +* Features data. The features data that is supplied to the model to do prediction +* Prediction data. The prediction as output of your model +* Ground truth / Actual data. The actual value of thing that your model try to predict + +Those data can be collected from training phase and serving phase (production). Data that is collected on the training phase is used as the baseline dataset, we can refer it as training dataset. For data during serving phase we can refer it as production dataset, this data must be emitted by the model. By default the merlin model is not emitting any of those data, hence model observability is not enabled by default. However, merlin provides a way so model can emit such data but currently it is limited only for PyFunc model. The way is to turn on the flag of `ENABLE_MODEL_OBSERVABILITY` and modify the PyFunc model to returning model input (features) and model output (prediction output), more detail will be explained in the `Onboarding` section. + +## Architecture + +![architecture](../../diagrams/model_observability.drawio.svg) + +From above architecture diagram, we can see that there are three places where the data is published to model observability system +* Model deployment workflow. Especially after model training step is completed. This step is publishing training dataset as baseline dataset +* Model serving. PyFunc model will emit features and predictions data to a topic in a kafka cluster, and separate kafka consumer consume corresponding topic and publish the data to model observability system. Kafka consumer also store the data into separate BQ table that later will be used to be joined with user ground truth BQ table. +* Ground truth collector workflow. This workflow primary objective is to publish ground truth or actual for a prediction + +## Onboarding +As the architecture diagram illustrate, the end to end model onboarding to model observability needs to involving several components. The scope of this section is limited to merlin model modification. {{ workflow_scope_explaination }} + +### ### PyFunc modification +Currently the only supported model for model observability is PyFunc model, the model should implements class `PyFuncV3Model` instead of `PyFuncModel`. This `PyFuncV3Model` has difference method signature that must be implemented. Following are the new methods: +| Method Name | Description | +|-------------|-------------| +| `preprocess(self, request: dict, **kwargs) -> ModelInput` | Doing preprocessing that returning all the required features for prediction. Must be implemented if using `HTTP_JSON` protocol | +| `postprocess(self, model_output: ModelOutput, request: dict) -> dict` | Postprocessing basically do additional processing to construct end result of the overall model. Must be implemented if using `HTTP_JSON` protocol | +| `upiv1_preprocess(self, request: upi_pb2.PredictValuesRequest, context: grpc.ServicerContext) -> ModelInput` | Preprocess method signature that only called when using `UPI_V1` protocol. Must be implemented if using `UPI_V1` protocol | +| `upiv1_postprocess(self, model_output: ModelOutput, request: upi_pb2.PredictValuesRequest) -> upi_pb2.PredictValuesResponse` | Postprocess method signature that only callend when using `UPI_V1` protocol. Must be implemented if using `UPI_V1` protocol | + +Beside changes in signature, you can see some of those methods returning new type, `ModelInput` and `ModelOutput`. `ModelInput` is a class that represents input information of the models, this class contains following fields: +| Field | Type | Description| +|-------|------|------------| +| `prediction_ids` | List[str] | Unique identifier for each prediction | +| `features` | Union[Values, pandas.DataFrame] | Features value that is used by the model to generate prediction. Length of features should be the same with `prediction_ids` | +| `entities` | Optional[Union[Values, pandas.DataFrame]] | Additional data that are not used for prediction, but this data is used to retrieved another features, e.g `driver_id`, we can retrieve features associated with certain `driver_id`| +| `session_id` | str | Identifier for the request. This value will be used together with `prediction_ids` as prediction identifier in model observability system | + +`ModelInput` data is essential for model observability since it contains features values and identifier of prediction. Features values are used to calculate feature drift, and identifier is used as join key between features, prediction data with ground truth data. On the other hand, `ModelOutput` is the class that represent raw model prediction output, not the final output of PyFunc model. `ModelOutput` class contains following fields: +| Field | Type | Description | +|-------|------|-------------| +| `prediction` | Values | `predictions` contains prediction output from ml_predict, it may contains multiple columns e.g for multiclass classification or for binary classification that contains prediction score and label | +| `prediction_ids` | List[str] | Unique identifier for each prediction output | + +Same like `ModelInput`, `ModelOutput` is also essential for model observability, it can be used to calculate prediction drift but more importantly it can calculate performance metrics. + +### Configure Model Schema + +Model schema is essential for model observability because it is used by the kafka consumer to choose which columns that is relevant to model observability and do necessary preprocessing before publishing the data to model observability system. Users can see more detail of configuring model schema [here](../templates/09_model_schema.md) + +### Deployment +There is not much change on the deployment part, users just needs to set `enable_model_observability` parameter to `True` during model deploy. For clarity, we take one use case for model observability example, suppose a model has 4 features: +* featureA that has float type +* featureB that has int type +* featureC that has string type +* featureD that has float type + +The model type is ranking with prediction group id information is located in `session_id` column, prediction id in `prediction_id` column, rank score in `score` column and `relevance_score_column` in `relevance_score`. Below is the snipped of the python code + +```python +class ModelObservabilityModel(PyFuncV3Model): + + def preprocess(self, request: dict, **kwargs) -> ModelInput: + return ModelInput( + session_id="session_id", + prediction_ids=["prediction_1", "prediction_2"], + features=pd.DataFrame([[0.7, 200, "ID", True], [0.99, 250, "SG", False]], columns=["featureA", "featureB", "featureC", "featureD"]), + ) + + def infer(self, model_input: ModelInput) -> ModelOutput: + return ModelOutput( + prediction_ids=model_input.prediction_ids, + predictions=Values(columns=["score"], data=[[0.5], [0.9]]), + ) + def postprocess(self, model_output: ModelOutput, request: dict) -> dict: + return {"predictions": model_output.predictions.data} + + +model_schema = ModelSchema(spec=InferenceSchema( + feature_types={ + "featureA": ValueType.FLOAT64, + "featureB": ValueType.INT64, + "featureC": ValueType.STRING, + "featureD": ValueType.BOOLEAN + }, + prediction_id_column="prediction_id", + model_prediction_output=RankingOutput( + rank_score_column="score", + prediction_group_id_column="session_id", + relevance_score_column="relevance_score" + ) + )) +with merlin.new_model_version(model_schema=model_schema) as v: + v.log_pyfunc_model(model_instance=ModelObservabilityModel(), + conda_env="env.yaml", + code_dir=["src"], + artifacts={"model": ARTIFACT_PATH}) +endpoint = merlin.deploy(v, enable_model_observability=True) +``` diff --git a/python/sdk/merlin/autoscaling.py b/python/sdk/merlin/autoscaling.py index bf6b23141..710da5f24 100644 --- a/python/sdk/merlin/autoscaling.py +++ b/python/sdk/merlin/autoscaling.py @@ -10,6 +10,7 @@ class MetricsType(Enum): MEMORY_UTILIZATION: percentage of Memory utilization. RPS: throughput in request per second. """ + CONCURRENCY = "concurrency" CPU_UTILIZATION = "cpu_utilization" MEMORY_UTILIZATION = "memory_utilization" @@ -43,5 +44,7 @@ def target_value(self) -> float: return self._target_value -RAW_DEPLOYMENT_DEFAULT_AUTOSCALING_POLICY = AutoscalingPolicy(MetricsType.CPU_UTILIZATION, 50) +RAW_DEPLOYMENT_DEFAULT_AUTOSCALING_POLICY = AutoscalingPolicy( + MetricsType.CPU_UTILIZATION, 50 +) SERVERLESS_DEFAULT_AUTOSCALING_POLICY = AutoscalingPolicy(MetricsType.CONCURRENCY, 1) diff --git a/python/sdk/merlin/logger.py b/python/sdk/merlin/logger.py index 265810e05..2bf88e95a 100644 --- a/python/sdk/merlin/logger.py +++ b/python/sdk/merlin/logger.py @@ -13,14 +13,16 @@ # limitations under the License. from enum import Enum +from typing import Optional + import client from merlin.util import autostr -from typing import Optional + class LoggerMode(Enum): - ALL = 'all' - REQUEST = 'request' - RESPONSE = 'response' + ALL = "all" + REQUEST = "request" + RESPONSE = "response" @autostr @@ -36,10 +38,13 @@ def enabled(self): @property def mode(self): return self._mode - + + @autostr class PredictionLoggerConfig: - def __init__(self, enabled: bool, raw_features_table: str, entities_table: str) -> None: + def __init__( + self, enabled: bool, raw_features_table: str, entities_table: str + ) -> None: self._enabled = enabled self._raw_features_table = raw_features_table self._entities_table = entities_table @@ -47,11 +52,11 @@ def __init__(self, enabled: bool, raw_features_table: str, entities_table: str) @property def enabled(self): return self._enabled - + @property def raw_features_table(self): return self._raw_features_table - + @property def entities_table(self): return self._entities_table @@ -62,7 +67,7 @@ class Logger: logger_mode_mapping = { LoggerMode.ALL: client.LoggerMode.ALL, LoggerMode.REQUEST: client.LoggerMode.REQUEST, - LoggerMode.RESPONSE: client.LoggerMode.RESPONSE + LoggerMode.RESPONSE: client.LoggerMode.RESPONSE, } logger_mode_mapping_rev = { @@ -71,7 +76,12 @@ class Logger: client.LoggerMode.RESPONSE: LoggerMode.RESPONSE, } - def __init__(self, model: LoggerConfig = None, transformer: LoggerConfig = None, prediction: PredictionLoggerConfig = None): + def __init__( + self, + model: LoggerConfig = None, + transformer: LoggerConfig = None, + prediction: PredictionLoggerConfig = None, + ): self._model = model self._transformer = transformer self._prediction = prediction @@ -82,21 +92,40 @@ def from_logger_response(cls, response: Optional[client.Logger]): return Logger() model_config = None if response.model is not None: - model_config = LoggerConfig(enabled=response.model.enabled, mode=cls._get_logger_mode_from_api_response(response.model.mode)) + model_config = LoggerConfig( + enabled=response.model.enabled, + mode=cls._get_logger_mode_from_api_response(response.model.mode), + ) transformer_config = None if response.transformer is not None: - transformer_config = LoggerConfig(enabled=response.transformer.enabled, - mode=cls._get_logger_mode_from_api_response(response.transformer.mode)) + transformer_config = LoggerConfig( + enabled=response.transformer.enabled, + mode=cls._get_logger_mode_from_api_response(response.transformer.mode), + ) prediction_config = None prediction_logger = response.prediction if prediction_logger is not None: - raw_features_table = prediction_logger.raw_features_table if prediction_logger.raw_features_table is not None else "" - entities_table = prediction_logger.entities_table if prediction_logger.entities_table is not None else "" - prediction_config = PredictionLoggerConfig(enabled=prediction_logger.enabled, - raw_features_table=raw_features_table, - entities_table=entities_table) + raw_features_table = ( + prediction_logger.raw_features_table + if prediction_logger.raw_features_table is not None + else "" + ) + entities_table = ( + prediction_logger.entities_table + if prediction_logger.entities_table is not None + else "" + ) + prediction_config = PredictionLoggerConfig( + enabled=prediction_logger.enabled, + raw_features_table=raw_features_table, + entities_table=entities_table, + ) - return Logger(model=model_config, transformer=transformer_config, prediction=prediction_config) + return Logger( + model=model_config, + transformer=transformer_config, + prediction=prediction_config, + ) @classmethod def _get_logger_mode_from_api_response(cls, mode_from_api_response): @@ -105,32 +134,44 @@ def _get_logger_mode_from_api_response(cls, mode_from_api_response): mode = LoggerMode.ALL return mode - def to_logger_spec(self) -> Optional[client.Logger]: target_logger = None model_logger_config = None if self.model is not None: model_logger_config = client.LoggerConfig( - enabled=self.model.enabled, mode=Logger.logger_mode_mapping[self.model.mode]) + enabled=self.model.enabled, + mode=Logger.logger_mode_mapping[self.model.mode], + ) transformer_logger_config = None if self.transformer is not None: transformer_logger_config = client.LoggerConfig( - enabled=self.transformer.enabled, mode=Logger.logger_mode_mapping[self.transformer.mode]) + enabled=self.transformer.enabled, + mode=Logger.logger_mode_mapping[self.transformer.mode], + ) prediction_logger_config = None if self.prediction is not None: prediction_logger_config = client.PredictionLoggerConfig( - enabled=self.prediction.enabled, raw_features_table= self.prediction.raw_features_table, entities_table=self.prediction.entities_table + enabled=self.prediction.enabled, + raw_features_table=self.prediction.raw_features_table, + entities_table=self.prediction.entities_table, ) - if model_logger_config is not None or transformer_logger_config is not None or prediction_logger_config is not None: - target_logger = client.Logger(model=model_logger_config, transformer=transformer_logger_config, prediction=prediction_logger_config) + if ( + model_logger_config is not None + or transformer_logger_config is not None + or prediction_logger_config is not None + ): + target_logger = client.Logger( + model=model_logger_config, + transformer=transformer_logger_config, + prediction=prediction_logger_config, + ) return target_logger - @property def model(self): return self._model @@ -138,7 +179,7 @@ def model(self): @property def transformer(self): return self._transformer - + @property def prediction(self): return self._prediction diff --git a/python/sdk/merlin/merlin.py b/python/sdk/merlin/merlin.py index 42d87934c..5ba69cf4c 100644 --- a/python/sdk/merlin/merlin.py +++ b/python/sdk/merlin/merlin.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import click import warnings -import os -import merlin -from merlin.model import ModelType + +import click from cookiecutter.main import cookiecutter +from merlin.model import ModelType from merlin.util import valid_name_check +import merlin + warnings.filterwarnings("ignore") diff --git a/python/sdk/merlin/observability/inference.py b/python/sdk/merlin/observability/inference.py index 7660b7ed5..3a51a281d 100644 --- a/python/sdk/merlin/observability/inference.py +++ b/python/sdk/merlin/observability/inference.py @@ -221,9 +221,11 @@ def preprocess( self, df: pd.DataFrame, observation_types: List[ObservationType] ) -> pd.DataFrame: if ObservationType.PREDICTION in observation_types: - df[self.rank_column] = df.groupby(self.prediction_group_id_column)[ - self.rank_score_column - ].rank(method="first", ascending=False).astype(np.int_) + df[self.rank_column] = ( + df.groupby(self.prediction_group_id_column)[self.rank_score_column] + .rank(method="first", ascending=False) + .astype(np.int_) + ) return df def prediction_types(self) -> Dict[str, ValueType]: diff --git a/python/sdk/merlin/validation.py b/python/sdk/merlin/validation.py index 4e0359acb..a3df880be 100644 --- a/python/sdk/merlin/validation.py +++ b/python/sdk/merlin/validation.py @@ -21,7 +21,7 @@ def validate_model_dir(model_type, model_dir): Validates user-provided model directory based on file structure. For tensorflow models, checking is only done on the subdirectory with the largest version number. - + :param model_type: type of given model :param model_dir: directory containing serialised model file """ @@ -30,10 +30,12 @@ def validate_model_dir(model_type, model_dir): if not isdir(model_dir): raise ValueError(f"{model_dir} is not a directory") - if model_type == ModelType.PYFUNC or \ - model_type == ModelType.PYFUNC_V2 or \ - model_type == ModelType.PYFUNC_V3 or \ - model_type == ModelType.CUSTOM: + if ( + model_type == ModelType.PYFUNC + or model_type == ModelType.PYFUNC_V2 + or model_type == ModelType.PYFUNC_V3 + or model_type == ModelType.CUSTOM + ): return if model_type == ModelType.TENSORFLOW: @@ -67,17 +69,16 @@ def validate_model_dir(model_type, model_dir): raise ValueError(f"{config_path} is not found") model_store_dir = join(model_dir, "model-store") - if not any(fname.endswith('.mar') for fname in listdir(model_store_dir)): + if not any(fname.endswith(".mar") for fname in listdir(model_store_dir)): raise ValueError(f".mar file is not found in {model_store_dir}") return model_file_map = { - ModelType.XGBOOST: ['model.bst'], - ModelType.SKLEARN: ['model.joblib'], - ModelType.ONNX: ['model.onnx'] + ModelType.XGBOOST: ["model.bst"], + ModelType.SKLEARN: ["model.joblib"], + ModelType.ONNX: ["model.onnx"], } files = listdir(model_dir) if not all([file in files for file in model_file_map[model_type]]): raise ValueError(f"{model_file_map[model_type]} is not found in {model_dir}") - diff --git a/python/sdk/pyfunc.Dockerfile b/python/sdk/pyfunc.Dockerfile deleted file mode 100644 index ae49e30fa..000000000 --- a/python/sdk/pyfunc.Dockerfile +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2020 The Merlin Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -ARG BASE_IMAGE=ghcr.io/caraml-dev/merlin/merlin-pyfunc-base:0.38.1 -FROM ${BASE_IMAGE} - -# Download and install user model dependencies -ARG MODEL_DEPENDENCIES_URL -COPY ${MODEL_DEPENDENCIES_URL} conda.yaml -RUN conda env create --name merlin-model --file conda.yaml - -# Copy and install pyfunc-server and merlin-sdk dependencies -COPY merlin/python/pyfunc-server /pyfunc-server -COPY merlin/python/sdk /sdk -ENV SDK_PATH=/sdk - -WORKDIR /pyfunc-server -RUN /bin/bash -c ". activate merlin-model && pip uninstall -y merlin-sdk && pip install -r /pyfunc-server/requirements.txt" - -# Download and dry-run user model artifacts and code -ARG MODEL_ARTIFACTS_URL -COPY ${MODEL_ARTIFACTS_URL} model -RUN /bin/bash -c ". activate merlin-model && python -m pyfuncserver --model_dir model --dry_run" - -CMD ["/bin/bash", "/pyfunc-server/run.sh"]