Add documentation for model observability and model schema (#528)

# Description  Adding code comments and user documentation related to model observability and model schema # Modifications  # Tests  # Checklist - [ ] Added PR label - [ ] Added unit test, integration, and/or e2e tests - [ ] Tested locally - [ ] Updated documentation - [ ] Update Swagger spec if the PR introduce API changes - [ ] Regenerated Golang and Python client if the PR introduces API changes # Release Notes  ```release-note ```
caraml-dev · Feb 20, 2024 · 7cc45cb · 7cc45cb
1 parent b138f39
commit 7cc45cb
Show file tree

Hide file tree

Showing 21 changed files with 695 additions and 186 deletions.
diff --git a/api/api/model_schema_api.go b/api/api/model_schema_api.go
@@ -9,10 +9,12 @@ import (
 	mErrors "github.com/caraml-dev/merlin/pkg/errors"
 )
 
+// ModelSchemaController
 type ModelSchemaController struct {
 	*AppContext
 }
 
+// GetAllSchemas list all model schemas given model ID
 func (m *ModelSchemaController) GetAllSchemas(r *http.Request, vars map[string]string, _ interface{}) *Response {
 	ctx := r.Context()
 	modelID, _ := models.ParseID(vars["model_id"])
@@ -26,6 +28,7 @@ func (m *ModelSchemaController) GetAllSchemas(r *http.Request, vars map[string]s
 	return Ok(modelSchemas)
 }
 
+// GetSchema get detail of a model schema given the schema id and model id
 func (m *ModelSchemaController) GetSchema(r *http.Request, vars map[string]string, _ interface{}) *Response {
 	ctx := r.Context()
 	modelID, _ := models.ParseID(vars["model_id"])
@@ -41,6 +44,10 @@ func (m *ModelSchemaController) GetSchema(r *http.Request, vars map[string]strin
 	return Ok(modelSchema)
 }
 
+// CreateOrUpdateSchema upsert schema
+// If ID is not defined it will create new model schema
+// If ID is defined but not exist, it will create new model schema
+// If ID is defined and exist, it will update the existing model schema associated with that ID
 func (m *ModelSchemaController) CreateOrUpdateSchema(r *http.Request, vars map[string]string, body interface{}) *Response {
 	ctx := r.Context()
 	modelID, _ := models.ParseID(vars["model_id"])
@@ -62,6 +69,7 @@ func (m *ModelSchemaController) CreateOrUpdateSchema(r *http.Request, vars map[s
 	return Ok(schema)
 }
 
+// DeleteSchema delete model schema given schema id and model id
 func (m *ModelSchemaController) DeleteSchema(r *http.Request, vars map[string]string, _ interface{}) *Response {
 	ctx := r.Context()
 	modelID, _ := models.ParseID(vars["model_id"])

diff --git a/api/models/model_schema.go b/api/models/model_schema.go
@@ -8,8 +8,7 @@ import (
 	"fmt"
 )
 
-type InferenceType string
-
+// ModelPredictionOutputClass is type for kinds of model type
 type ModelPredictionOutputClass string
 
 const (
@@ -18,6 +17,7 @@ const (
 	Ranking              ModelPredictionOutputClass = "RankingOutput"
 )
 
+// Value type is type that represent type of the value
 type ValueType string
 
 const (
@@ -27,23 +27,29 @@ const (
 	String  ValueType = "string"
 )
 
+// ModelSchema
 type ModelSchema struct {
 	ID      ID          `json:"id"`
 	Spec    *SchemaSpec `json:"spec,omitempty"`
 	ModelID ID          `json:"model_id"`
 }
 
+// SchemaSpec
 type SchemaSpec struct {
 	PredictionIDColumn    string                 `json:"prediction_id_column"`
 	ModelPredictionOutput *ModelPredictionOutput `json:"model_prediction_output"`
 	TagColumns            []string               `json:"tag_columns"`
 	FeatureTypes          map[string]ValueType   `json:"feature_types"`
 }
 
+// Value returning a value for `SchemaSpec` instance
+// This is required to be implemented when this instance is treated as JSONB column
 func (s SchemaSpec) Value() (driver.Value, error) {
 	return json.Marshal(s)
 }
 
+// Scan returning error when assigning value from db driver is failing
+// This is required to be implemented when this instance is treated as JSONB column
 func (s *SchemaSpec) Scan(value interface{}) error {
 	b, ok := value.([]byte)
 	if !ok {
@@ -65,6 +71,7 @@ func newStrictDecoder(data []byte) *json.Decoder {
 	return dec
 }
 
+// UnmarshalJSON custom deserialization of bytes into `ModelPredictionOutput`
 func (m *ModelPredictionOutput) UnmarshalJSON(data []byte) error {
 	var err error
 	outputClassStruct := struct {
@@ -99,6 +106,7 @@ func (m *ModelPredictionOutput) UnmarshalJSON(data []byte) error {
 	return nil
 }
 
+// MarshalJSON custom serialization of `ModelPredictionOutput` into json byte
 func (m ModelPredictionOutput) MarshalJSON() ([]byte, error) {
 	if m.BinaryClassificationOutput != nil {
 		return json.Marshal(&m.BinaryClassificationOutput)
@@ -115,6 +123,7 @@ func (m ModelPredictionOutput) MarshalJSON() ([]byte, error) {
 	return nil, nil
 }
 
+// BinaryClassificationOutput is specification for prediction of binary classification model
 type BinaryClassificationOutput struct {
 	ActualLabelColumn     string                     `json:"actual_label_column"`
 	NegativeClassLabel    string                     `json:"negative_class_label"`
@@ -125,13 +134,15 @@ type BinaryClassificationOutput struct {
 	OutputClass           ModelPredictionOutputClass `json:"output_class" validate:"required"`
 }
 
+// RankingOutput is specification for prediction of ranking model
 type RankingOutput struct {
 	PredictionGroudIDColumn string                     `json:"prediction_group_id_column"`
 	RankScoreColumn         string                     `json:"rank_score_column"`
 	RelevanceScoreColumn    string                     `json:"relevance_score"`
 	OutputClass             ModelPredictionOutputClass `json:"output_class" validate:"required"`
 }
 
+// Regression is specification for prediction of regression model
 type RegressionOutput struct {
 	PredictionScoreColumn string                     `json:"prediction_score_column"`
 	ActualScoreColumn     string                     `json:"actual_score_column"`

diff --git a/api/models/version.go b/api/models/version.go
@@ -23,6 +23,7 @@ import (
 	"gorm.io/gorm"
 )
 
+// Version
 type Version struct {
 	ID              ID                 `json:"id" gorm:"primary_key"`
 	ModelID         ID                 `json:"model_id" gorm:"primary_key"`
@@ -40,18 +41,21 @@ type Version struct {
 	CreatedUpdated
 }
 
+// VersionPost contains all information that is used during version creation
 type VersionPost struct {
 	Labels        KV           `json:"labels" gorm:"labels"`
 	PythonVersion string       `json:"python_version" gorm:"python_version"`
 	ModelSchema   *ModelSchema `json:"model_schema"`
 }
 
+// VersionPatch contains all information that is used during version update or patch
 type VersionPatch struct {
 	Properties      *KV              `json:"properties,omitempty"`
 	CustomPredictor *CustomPredictor `json:"custom_predictor,omitempty"`
 	ModelSchema     *ModelSchema     `json:"model_schema"`
 }
 
+// CustomPredictor contains configuration for custom model
 type CustomPredictor struct {
 	Image   string `json:"image"`
 	Command string `json:"command"`
@@ -93,6 +97,7 @@ func (kv *KV) Scan(value interface{}) error {
 	return json.Unmarshal(b, &kv)
 }
 
+// Validate do validation on the value of version
 func (v *Version) Validate() error {
 	if v.CustomPredictor != nil && v.Model.Type == ModelTypeCustom {
 		if err := v.CustomPredictor.IsValid(); err != nil {
@@ -107,6 +112,7 @@ func (v *Version) Validate() error {
 	return nil
 }
 
+// Patch version value
 func (v *Version) Patch(patch *VersionPatch) {
 	if patch.Properties != nil {
 		v.Properties = *patch.Properties
@@ -119,6 +125,7 @@ func (v *Version) Patch(patch *VersionPatch) {
 	}
 }
 
+// BeforeCreate find the latest persisted ID from version DB and increament it and assign to the receiver
 func (v *Version) BeforeCreate(db *gorm.DB) error {
 	if v.ID == 0 {
 		var maxModelVersionID int

diff --git a/api/service/model_schema_service.go b/api/service/model_schema_service.go
@@ -10,17 +10,23 @@ import (
 	"gorm.io/gorm"
 )
 
+// ModelSchemaService interface
 type ModelSchemaService interface {
+	// List all the model schemas for a model
 	List(ctx context.Context, modelID models.ID) ([]*models.ModelSchema, error)
+	// Save model schema, it can be create or update existing schema
 	Save(ctx context.Context, modelSchema *models.ModelSchema) (*models.ModelSchema, error)
+	// Delete a model schema
 	Delete(ctx context.Context, modelSchema *models.ModelSchema) error
+	// FindByID get schema given it's schema id and model id
 	FindByID(ctx context.Context, modelSchemaID models.ID, modelID models.ID) (*models.ModelSchema, error)
 }
 
 type modelSchemaService struct {
 	modelSchemaStorage storage.ModelSchemaStorage
 }
 
+// NewModelSchemaService create an instance of `ModelSchemaService`
 func NewModelSchemaService(storage storage.ModelSchemaStorage) ModelSchemaService {
 	return &modelSchemaService{
 		modelSchemaStorage: storage,

diff --git a/api/storage/model_schema_storage.go b/api/storage/model_schema_storage.go
@@ -7,17 +7,23 @@ import (
 	"gorm.io/gorm"
 )
 
+// ModelSchemaStorage interface, layer that responsibles to communicate directly with database
 type ModelSchemaStorage interface {
+	// Save create or update model schema to DB
 	Save(ctx context.Context, modelSchema *models.ModelSchema) (*models.ModelSchema, error)
+	// FindAll find all schemas givem model id from DB
 	FindAll(ctx context.Context, modelID models.ID) ([]*models.ModelSchema, error)
+	// FindByID find schema given it's id from DB
 	FindByID(ctx context.Context, modelSchemaID models.ID, modelID models.ID) (*models.ModelSchema, error)
+	// Delete delete schema give it's id from DB
 	Delete(ctx context.Context, modelSchema *models.ModelSchema) error
 }
 
 type modelSchemaStorage struct {
 	db *gorm.DB
 }
 
+// NewModelSchemaStorage create new instance of ModelSchemaStorage
 func NewModelSchemaStorage(db *gorm.DB) ModelSchemaStorage {
 	return &modelSchemaStorage{db: db}
 }

diff --git a/docs/diagrams/model_observability.drawio.svg b/docs/diagrams/model_observability.drawio.svg
diff --git a/docs/user/generated/09_troubleshooting_deployment_errors.md b/docs/user/generated/09_troubleshooting_deployment_errors.md
@@ -10,9 +10,9 @@ This page discusses scenarios you may encounter during model deployment that wil
 
 ## Troubleshooting views
 
-Merlin provides the following views on the UI to troubeshoot a deployment:
+Merlin provides the following views on the UI to troubleshoot a deployment:
 
-- **Logs** - the live console output when the iamge is building or the deployment is running
+- **Logs** - the live console output when the image is building or the deployment is running
 - **History** - the list of deployment history status and message
 
 You can navigate to these views from the Model Version page by clicking on the Logs tab or History tab.
@@ -25,7 +25,7 @@ You can navigate to these views from the Model Version page by clicking on the L
 
 The "OOMKilled" error occurs when a container is terminated due to out-of-memory conditions. This typically happens when a container exceeds its allocated memory limit and the system is unable to provide additional memory. When this occurs, the container will be killed with exit code 137 to free up resources.
 
-This error can effect both image building and deployment steps. To resolve the OOMKilled error, follow these steps:
+This error can affect both image building and deployment steps. To resolve the OOMKilled error, follow these steps:
 
 1. Check which components that got OOMKilled
 2. Check affected component memory limits
@@ -42,7 +42,7 @@ Liveness and readiness probes are essential for ensuring the health and availabi
 Troubleshooting steps:
 
 1. For standard model type, check pre-trained model size
-2. For pyfunc model type, check how model got initialized
+2. For pyfunc model type, check how model was initialized
 3. Inspect model logs
 4. Monitor resource utilization
 

diff --git a/docs/user/generated/10_model_schema.md b/docs/user/generated/10_model_schema.md
@@ -0,0 +1,90 @@
+<!-- page-title: Model Schema -->
+
+# Model Schema
+
+Model schema is a specification of input and output of a model, such as what are the features columns, prediction columns and also ground truth columns. Following are the fields in model schema:
+
+| Field | Type | Description | Mandatory |
+|-------|------|-------------|-----------|
+| `id` | int | Unique identifier for each model schema | Not mandatory, if ID is not specified it will create new model schema otherwise it will update the model schema with corresponding ID |
+| `model_id`| int | Model ID that correlate with the schema | Not mandatory, if not specified the SDK will assign it with the model that user set |
+| `spec` | InferenceSchema | Detail specification for model schema | True |
+
+Detail specification is defined by using `InferenceSchema` class, following are the fields:
+| Field | Type | Description | Mandatory |
+|-------|------|-------------|-----------|
+| `feature_types` | Dict[str, ValueType] | Mapping between feature name with the type of the feature | True |
+| `model_prediction_output` | PredictionOutput | Prediction specification that differ between model types, e.g BinaryClassificationOutput, RegressionOutput, RankingOutput | True |
+| `prediction_id_column` | str | The column name that contains prediction id value | True |
+| `tag_columns` | Optional[List[str]] | List of column names that contains additional information about prediction, you can treat it as metadata | False |
+
+From above we can see `model_prediction_output` field that has type `PredictionOutput`, this field is a specification of prediction that is generated by the model depending on it's model type. Currently we support 3 model types in the schema:
+* Binary Classification
+* Regression
+* Ranking
+
+Each model type has it's own model prediction output specification.
+
+## Binary Classification
+Model prediction output specification for Binary Classification type is `BinaryClassificationOutput` that has following fields:
+
+| Field | Type | Description | Mandatory |
+|-------|------|-------------|-----------|
+| `prediction_score_column` | str | Column that contains prediction score value of a model. Prediction score must be between 0.0 and 1.0 | True |
+| `actual_label_column` | str | Name of the column containing the actual class | False, because not all model has the ground truth |
+| `positive_class_label` | str | Label for positive class | True |
+| `negative_class_label` | str | Label for negative class | True |
+| `score_threshold` | float | Score threshold for prediction to be considered as positive class | False, if not specified it will use 0.5 as default |
+
+## Regression
+Model prediction output specification for Regression type is `RegressionOutput` that has following fields:
+
+| Field | Type | Description | Mandatory |
+|-------|------|-------------|-----------|
+| `prediction_score_column` | str | Column that contains prediction score value of a model | True |
+| `actual_score_column` | str | Name of the column containing the actual score | False, because not all model has the ground truth |
+
+
+## Ranking
+Model prediction output specification for Ranking type is `RankingOutput` that has following fields:
+
+| Field | Type | Description | Mandatory |
+|-------|------|-------------|-----------|
+| `rank_score_column` | str | Name of the column containing the ranking score of the prediction | True |
+| `prediction_group_id_column` | str | Name of the column containing the prediction group id | True |
+| `relevance_score_column` | str | Name of the column containing the relevance score of the prediction | True |
+
+## Define model schema
+From the specification above, users can create the schema for their model. Suppose that users have binary classification model, that has 4 features
+* featureA that has float type
+* featureB that has int type
+* featureC that has string type
+* featureD that has float type
+
+With positive class `complete` and negative class `non_complete` and the threshold for positive class is 0.75. Actual label is stored under column `target`, `prediction_score` under column `score` `prediction_id` under column `prediction_id`. From that specification, users can define the model schema and put it alongside version creation. Below is the example snipped code
+
+```python
+from merlin.model_schema import ModelSchema
+from merlin.observability.inference import InferenceSchema, ValueType, BinaryClassificationOutput
+ model_schema = ModelSchema(spec=InferenceSchema(
+        feature_types={
+            "featureA": ValueType.FLOAT64,
+            "featureB": ValueType.INT64,
+            "featureC": ValueType.STRING,
+            "featureD": ValueType.BOOLEAN
+        },
+        prediction_id_column="prediction_id",
+        model_prediction_output=BinaryClassificationOutput(
+            prediction_score_column="score",
+            actual_label_column="target",
+            positive_class_label="complete",
+            negative_class_label="non_complete",
+            score_threshold=0.75
+        )
+    ))
+with merlin.new_model_version(model_schema=model_schema) as v:
+    ....
+
+```
+
+The above snipped code will define model schema and attach it to certain model version, the reason is the schema for each version is possible to differ.