Skip to content

Commit

Permalink
adv query
Browse files Browse the repository at this point in the history
  • Loading branch information
DataKnox committed Aug 29, 2022
1 parent d54c7a9 commit c6f2a82
Show file tree
Hide file tree
Showing 5 changed files with 577 additions and 0 deletions.
Binary file added SQL/DP500/Adv Data Sources/demo.pbix
Binary file not shown.
316 changes: 316 additions & 0 deletions SQL/DP500/Adv Data Sources/deploy_ml_model.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"gather": {
"logged": 1661782492602
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 442 entries, 0 to 441\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 AGE 442 non-null int64 \n",
" 1 SEX 442 non-null int64 \n",
" 2 BMI 442 non-null float64\n",
" 3 BP 442 non-null float64\n",
" 4 S1 442 non-null int64 \n",
" 5 S2 442 non-null float64\n",
" 6 S3 442 non-null float64\n",
" 7 S4 442 non-null float64\n",
" 8 S5 442 non-null float64\n",
" 9 S6 442 non-null int64 \n",
"dtypes: float64(6), int64(4)\n",
"memory usage: 34.7 KB\n"
]
}
],
"source": [
"from azureml.opendatasets import Diabetes\n",
"\n",
"diabetes = Diabetes.get_tabular_dataset()\n",
"X = diabetes.drop_columns(\"Y\")\n",
"y = diabetes.keep_columns(\"Y\")\n",
"X_df = X.to_pandas_dataframe()\n",
"y_df = y.to_pandas_dataframe()\n",
"X_df.info()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"gather": {
"logged": 1661782499120
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [
{
"data": {
"text/plain": [
"['sklearn_regression_model.pkl']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import joblib\n",
"from sklearn.linear_model import Ridge\n",
"\n",
"model = Ridge().fit(X_df,y_df)\n",
"joblib.dump(model, 'sklearn_regression_model.pkl')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"gather": {
"logged": 1661782505031
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Registering model my-sklearn-model\n",
"Name: my-sklearn-model\n",
"Version: 1\n"
]
}
],
"source": [
"import sklearn\n",
"\n",
"from azureml.core import Workspace\n",
"from azureml.core import Model\n",
"from azureml.core.resource_configuration import ResourceConfiguration\n",
"\n",
"ws = Workspace.from_config()\n",
"\n",
"model = Model.register(workspace=ws,\n",
" model_name='my-sklearn-model', # Name of the registered model in your workspace.\n",
" model_path='./sklearn_regression_model.pkl', # Local file to upload and register as a model.\n",
" model_framework=Model.Framework.SCIKITLEARN, # Framework used to create the model.\n",
" model_framework_version=sklearn.__version__, # Version of scikit-learn used to create the model.\n",
" sample_input_dataset=X,\n",
" sample_output_dataset=y,\n",
" resource_configuration=ResourceConfiguration(cpu=2, memory_in_gb=4),\n",
" description='Ridge regression model to predict diabetes progression.',\n",
" tags={'area': 'diabetes', 'type': 'regression'})\n",
"\n",
"print('Name:', model.name)\n",
"print('Version:', model.version)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing score.py\n"
]
}
],
"source": [
"%%writefile score.py\n",
"\n",
"import json\n",
"import pickle\n",
"import numpy as np\n",
"import pandas as pd\n",
"import os\n",
"import joblib\n",
"from azureml.core.model import Model\n",
"\n",
"from inference_schema.schema_decorators import input_schema, output_schema\n",
"from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
"from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType\n",
"\n",
"\n",
"def init():\n",
" global model\n",
" # Replace filename if needed.\n",
" path = os.getenv('AZUREML_MODEL_DIR') \n",
" model_path = os.path.join(path, 'sklearn_regression_model.pkl')\n",
" # Deserialize the model file back into a sklearn model.\n",
" model = joblib.load(model_path)\n",
"\n",
"\n",
"input_sample = pd.DataFrame(data=[{\n",
" \"AGE\": 5,\n",
" \"SEX\": 2,\n",
" \"BMI\": 3.1,\n",
" \"BP\": 3.1,\n",
" \"S1\": 3.1,\n",
" \"S2\": 3.1,\n",
" \"S3\": 3.1,\n",
" \"S4\": 3.1,\n",
" \"S5\": 3.1,\n",
" \"S6\": 3.1\n",
"}])\n",
"\n",
"# This is an integer type sample. Use the data type that reflects the expected result.\n",
"output_sample = np.array([0])\n",
"\n",
"# To indicate that we support a variable length of data input,\n",
"# set enforce_shape=False\n",
"@input_schema('data', PandasParameterType(input_sample))\n",
"@output_schema(NumpyParameterType(output_sample))\n",
"def run(data):\n",
" try:\n",
" print(\"input_data....\")\n",
" print(data.columns)\n",
" print(type(data))\n",
" result = model.predict(data)\n",
" print(\"result.....\")\n",
" print(result)\n",
" # You can return any data type, as long as it can be serialized by JSON.\n",
" return result.tolist()\n",
" except Exception as e:\n",
" error = str(e)\n",
" return error"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"gather": {
"logged": 1661782512382
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"from azureml.core.model import InferenceConfig\n",
"from azureml.core import Environment\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"environment = Environment('my-sklearn-environment')\n",
"environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[\n",
" 'azureml-defaults',\n",
" 'inference-schema[numpy-support]',\n",
" 'joblib',\n",
" 'numpy',\n",
" 'pandas',\n",
" 'Jinja2<3.1',\n",
" 'scikit-learn=={}'.format(sklearn.__version__)\n",
"])\n",
"\n",
"inference_config = InferenceConfig(entry_script='./score.py',environment=environment)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.\n",
"Running\n",
"2022-08-29 14:15:19+00:00 Creating Container Registry if not exists."
]
}
],
"source": [
"service_name = 'my-diabetes-model'\n",
"\n",
"service = Model.deploy(ws, service_name, [model], inference_config, overwrite=True)\n",
"service.wait_for_deployment(show_output=True)"
]
}
],
"metadata": {
"kernel_info": {
"name": "python38-azureml"
},
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
"name": "python38-azureml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"nteract": {
"version": "[email protected]"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file added SQL/DP500/Adv Data Sources/ml_rpt.pbix
Binary file not shown.
Loading

0 comments on commit c6f2a82

Please sign in to comment.