diff --git a/1_feature_store.ipynb b/1_feature_store.ipynb
index 03f35bc..37b26c0 100644
--- a/1_feature_store.ipynb
+++ b/1_feature_store.ipynb
@@ -46,8 +46,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 24,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"import sagemaker\n",
@@ -71,16 +73,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 25,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ap-southeast-2\n"
+ ]
+ }
+ ],
"source": [
"role = sagemaker.get_execution_role()\n",
"sagemaker_session = sagemaker.Session()\n",
"default_bucket = sagemaker_session.default_bucket()\n",
"region = sagemaker_session.boto_region_name\n",
"s3_client = boto3.client('s3', region_name=region)\n",
- "\n",
+ "print(region)\n",
"# ParameterStore is a custom utility to save local variable values\n",
"# for use across all notebooks\n",
"ps = ParameterStore(verbose=False)\n",
@@ -89,8 +101,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 26,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"prefix = 'recsys-feature-store'\n",
@@ -114,7 +128,9 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"print(f'Using SageMaker version: {sagemaker.__version__}')\n",
@@ -124,9 +140,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 28,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Feature groups names:\n",
+ "\n",
+ "recsys-customers-fg-05-29-00-10\n",
+ "recsys-products-fg-05-29-00-10\n",
+ "recsys-orders-fg-05-29-00-10\n",
+ "recsys-click-stream-historical-fg-05-29-00-10\n",
+ "recsys-click-stream-fg-05-29-00-10\n"
+ ]
+ }
+ ],
"source": [
"print('Feature groups names:\\n')\n",
"print(customers_feature_group_name)\n",
@@ -152,9 +184,111 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 29,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " name | \n",
+ " state | \n",
+ " age | \n",
+ " is_married | \n",
+ " customer_health_index | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " C1 | \n",
+ " justin gutierrez | \n",
+ " alaska | \n",
+ " 52 | \n",
+ " 1 | \n",
+ " 0.590238 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " C2 | \n",
+ " karen cross | \n",
+ " idaho | \n",
+ " 29 | \n",
+ " 1 | \n",
+ " 0.622201 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " C3 | \n",
+ " amy king | \n",
+ " oklahoma | \n",
+ " 70 | \n",
+ " 1 | \n",
+ " 0.225476 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " C4 | \n",
+ " nicole hartman | \n",
+ " missouri | \n",
+ " 52 | \n",
+ " 1 | \n",
+ " 0.975817 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " C5 | \n",
+ " jessica powers | \n",
+ " minnesota | \n",
+ " 31 | \n",
+ " 1 | \n",
+ " 0.886133 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id name state age is_married \\\n",
+ "0 C1 justin gutierrez alaska 52 1 \n",
+ "1 C2 karen cross idaho 29 1 \n",
+ "2 C3 amy king oklahoma 70 1 \n",
+ "3 C4 nicole hartman missouri 52 1 \n",
+ "4 C5 jessica powers minnesota 31 1 \n",
+ "\n",
+ " customer_health_index \n",
+ "0 0.590238 \n",
+ "1 0.622201 \n",
+ "2 0.225476 \n",
+ "3 0.975817 \n",
+ "4 0.886133 "
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df_customers = pd.read_csv('data/customers.csv')\n",
"df_customers.head()"
@@ -169,9 +303,99 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 30,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_name | \n",
+ " product_category | \n",
+ " product_id | \n",
+ " product_health_index | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " chocolate sandwich cookies | \n",
+ " cookies_cakes | \n",
+ " P1 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nutter butter cookie bites go-pak | \n",
+ " cookies_cakes | \n",
+ " P25 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " danish butter cookies | \n",
+ " cookies_cakes | \n",
+ " P34 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " gluten free all natural chocolate chip cookies | \n",
+ " cookies_cakes | \n",
+ " P55 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " mini nilla wafers munch pack | \n",
+ " cookies_cakes | \n",
+ " P99 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " product_name product_category product_id \\\n",
+ "0 chocolate sandwich cookies cookies_cakes P1 \n",
+ "1 nutter butter cookie bites go-pak cookies_cakes P25 \n",
+ "2 danish butter cookies cookies_cakes P34 \n",
+ "3 gluten free all natural chocolate chip cookies cookies_cakes P55 \n",
+ "4 mini nilla wafers munch pack cookies_cakes P99 \n",
+ "\n",
+ " product_health_index \n",
+ "0 0.1 \n",
+ "1 0.1 \n",
+ "2 0.1 \n",
+ "3 0.1 \n",
+ "4 0.1 "
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df_products = pd.read_csv('data/products.csv')\n",
"df_products.head()"
@@ -186,9 +410,86 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 31,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " product_id | \n",
+ " purchase_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " C1 | \n",
+ " P10852 | \n",
+ " 87.71 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " C1 | \n",
+ " P10940 | \n",
+ " 101.71 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " C1 | \n",
+ " P13818 | \n",
+ " 42.11 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " C1 | \n",
+ " P2310 | \n",
+ " 55.37 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " C1 | \n",
+ " P393 | \n",
+ " 55.16 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id product_id purchase_amount\n",
+ "0 C1 P10852 87.71\n",
+ "1 C1 P10940 101.71\n",
+ "2 C1 P13818 42.11\n",
+ "3 C1 P2310 55.37\n",
+ "4 C1 P393 55.16"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df_orders = pd.read_csv('data/orders.csv')\n",
"df_orders.head()"
@@ -203,9 +504,98 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 32,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " product_id | \n",
+ " bought | \n",
+ " healthy_activity_last_2m | \n",
+ " rating | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " C1 | \n",
+ " P10852 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 3.048429 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " C3806 | \n",
+ " P10852 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1.674935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " C5257 | \n",
+ " P10852 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 2.691236 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " C8220 | \n",
+ " P10852 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1.773447 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " C1 | \n",
+ " P10852 | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 3.048429 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id product_id bought healthy_activity_last_2m rating\n",
+ "0 C1 P10852 1 1 3.048429\n",
+ "1 C3806 P10852 1 1 1.674935\n",
+ "2 C5257 P10852 1 0 2.691236\n",
+ "3 C8220 P10852 1 1 1.773447\n",
+ "4 C1 P10852 0 9 3.048429"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df_click_stream_historical = pd.read_csv('data/click_stream_historical.csv')\n",
"df_click_stream_historical.head()"
@@ -220,9 +610,72 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 33,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " sum_activity_weight_last_2m | \n",
+ " avg_product_health_index_last_2m | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " C09234 | \n",
+ " 8 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " D19283 | \n",
+ " 3 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " C1234 | \n",
+ " 9 | \n",
+ " 0.8 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id sum_activity_weight_last_2m avg_product_health_index_last_2m\n",
+ "0 C09234 8 0.2\n",
+ "1 D19283 3 0.1\n",
+ "2 C1234 9 0.8"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Read a sample in order to have a schema for Feature Group creation\n",
"df_click_stream = pd.read_csv('data/click_stream.csv')\n",
@@ -247,23 +700,244 @@
"For more information, see [Feature Store Concepts](https://docs.aws.amazon.com/sagemaker/latest/dg/feature-store-getting-started.html#feature-store-concepts) and [these docs](https://docs.aws.amazon.com/sagemaker/latest/dg/feature-store-create-feature-group.html)."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "customers_feature_group = FeatureGroup(\n",
+ " name=customers_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")\n",
+ "orders_feature_group = FeatureGroup(\n",
+ " name=orders_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")\n",
+ "products_feature_group = FeatureGroup(\n",
+ " name=products_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")\n",
+ "click_stream_historical_feature_group = FeatureGroup(\n",
+ " name=click_stream_historical_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")\n",
+ "click_stream_feature_group = FeatureGroup(\n",
+ " name=click_stream_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " sum_activity_weight_last_2m | \n",
+ " avg_product_health_index_last_2m | \n",
+ " event_time | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " C09234 | \n",
+ " 8 | \n",
+ " 0.2 | \n",
+ " 1.716941e+09 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " D19283 | \n",
+ " 3 | \n",
+ " 0.1 | \n",
+ " 1.716941e+09 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " C1234 | \n",
+ " 9 | \n",
+ " 0.8 | \n",
+ " 1.716941e+09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id sum_activity_weight_last_2m avg_product_health_index_last_2m \\\n",
+ "0 C09234 8 0.2 \n",
+ "1 D19283 3 0.1 \n",
+ "2 C1234 9 0.8 \n",
+ "\n",
+ " event_time \n",
+ "0 1.716941e+09 \n",
+ "1 1.716941e+09 \n",
+ "2 1.716941e+09 "
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Event Time\n",
+ "event_time_feature_name = \"event_time\"\n",
+ "current_time_sec = int(round(time.time()))\n",
+ "\n",
+ "df_customers[event_time_feature_name] = pd.Series([current_time_sec]*len(df_customers), dtype=\"float64\")\n",
+ "df_orders[event_time_feature_name] = pd.Series([current_time_sec]*len(df_orders), dtype=\"float64\")\n",
+ "df_products[event_time_feature_name] = pd.Series([current_time_sec]*len(df_products), dtype=\"float64\")\n",
+ "df_click_stream_historical[event_time_feature_name] = pd.Series([current_time_sec]*len(df_click_stream_historical), dtype=\"float64\")\n",
+ "df_click_stream[event_time_feature_name] = pd.Series([current_time_sec]*len(df_click_stream), dtype=\"float64\")\n",
+ "\n",
+ "df_click_stream.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[FeatureDefinition(feature_name='customer_id', feature_type=, collection_type=None),\n",
+ " FeatureDefinition(feature_name='sum_activity_weight_last_2m', feature_type=, collection_type=None),\n",
+ " FeatureDefinition(feature_name='avg_product_health_index_last_2m', feature_type=, collection_type=None),\n",
+ " FeatureDefinition(feature_name='event_time', feature_type=, collection_type=None)]"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load Feature Definitions\n",
+ "customers_feature_group.load_feature_definitions(data_frame=df_customers)\n",
+ "orders_feature_group.load_feature_definitions(data_frame=df_orders)\n",
+ "products_feature_group.load_feature_definitions(data_frame=df_products)\n",
+ "click_stream_historical_feature_group.load_feature_definitions(data_frame=df_click_stream_historical)\n",
+ "click_stream_feature_group.load_feature_definitions(data_frame=df_click_stream)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "customers_feature_group = create_feature_group(df_customers, customers_feature_group_name,\n",
- " 'customer_id', prefix, sagemaker_session)\n",
- "products_feature_group = create_feature_group(df_products, products_feature_group_name, 'product_id',\n",
- " prefix, sagemaker_session)\n",
- "orders_feature_group = create_feature_group(df_orders, orders_feature_group_name, 'order_id', prefix,\n",
- " sagemaker_session)\n",
- "click_stream_historical_feature_group = create_feature_group(df_click_stream_historical,\n",
- " click_stream_historical_feature_group_name,\n",
- " 'click_stream_id', prefix, sagemaker_session)\n",
- "click_stream_feature_group = create_feature_group(df_click_stream, click_stream_feature_group_name, 'customer_id',\n",
- " prefix, sagemaker_session)"
+ "# Create Feature Groups\n",
+ "record_identifier_feature_name = \"customer_id\"\n",
+ "customers_feature_group.create(\n",
+ " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n",
+ " record_identifier_name=record_identifier_feature_name,\n",
+ " event_time_feature_name=event_time_feature_name,\n",
+ " role_arn=role,\n",
+ " enable_online_store=True\n",
+ ")\n",
+ "\n",
+ "orders_feature_group.create(\n",
+ " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n",
+ " record_identifier_name=record_identifier_feature_name,\n",
+ " event_time_feature_name=event_time_feature_name,\n",
+ " role_arn=role,\n",
+ " enable_online_store=True\n",
+ ")\n",
+ "\n",
+ "click_stream_historical_feature_group.create(\n",
+ " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n",
+ " record_identifier_name=record_identifier_feature_name,\n",
+ " event_time_feature_name=event_time_feature_name,\n",
+ " role_arn=role,\n",
+ " enable_online_store=True\n",
+ ")\n",
+ "\n",
+ "click_stream_feature_group.create(\n",
+ " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n",
+ " record_identifier_name=record_identifier_feature_name,\n",
+ " event_time_feature_name=event_time_feature_name,\n",
+ " role_arn=role,\n",
+ " enable_online_store=True\n",
+ ")\n",
+ "\n",
+ "products_record_identifier_feature_name = \"product_id\"\n",
+ "\n",
+ "products_feature_group.create(\n",
+ " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n",
+ " record_identifier_name=products_record_identifier_feature_name,\n",
+ " event_time_feature_name=event_time_feature_name,\n",
+ " role_arn=role,\n",
+ " enable_online_store=True\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Waiting for Feature Group to be Created\n",
+ "Waiting for Feature Group to be Created\n",
+ "FeatureGroup recsys-customers-fg-05-29-00-10 successfully created.\n",
+ "Waiting for Feature Group to be Created\n",
+ "FeatureGroup recsys-orders-fg-05-29-00-10 successfully created.\n",
+ "Waiting for Feature Group to be Created\n",
+ "FeatureGroup recsys-products-fg-05-29-00-10 successfully created.\n",
+ "FeatureGroup recsys-click-stream-historical-fg-05-29-00-10 successfully created.\n",
+ "FeatureGroup recsys-click-stream-fg-05-29-00-10 successfully created.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Check Feature Groups have finished creating\n",
+ "def check_feature_group_status(feature_group):\n",
+ " status = feature_group.describe().get(\"FeatureGroupStatus\")\n",
+ " while status == \"Creating\":\n",
+ " print(\"Waiting for Feature Group to be Created\")\n",
+ " time.sleep(5)\n",
+ " status = feature_group.describe().get(\"FeatureGroupStatus\")\n",
+ " print(f\"FeatureGroup {feature_group.name} successfully created.\")\n",
+ "\n",
+ "\n",
+ "check_feature_group_status(customers_feature_group)\n",
+ "check_feature_group_status(orders_feature_group)\n",
+ "check_feature_group_status(products_feature_group)\n",
+ "check_feature_group_status(click_stream_historical_feature_group)\n",
+ "check_feature_group_status(click_stream_feature_group)"
]
},
{
@@ -277,22 +951,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 39,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
- "customers_table = get_feature_group_table_name(customers_feature_group)\n",
- "products_table = get_feature_group_table_name(products_feature_group)\n",
- "orders_table = get_feature_group_table_name(orders_feature_group)\n",
- "click_stream_historical_table = get_feature_group_table_name(click_stream_historical_feature_group)\n",
- "click_stream_table = get_feature_group_table_name(click_stream_feature_group)\n",
+ "# Store table names \n",
+ "customers_query = customers_feature_group.athena_query()\n",
+ "customers_table = customers_query.table_name\n",
+ "\n",
+ "products_query = products_feature_group.athena_query()\n",
+ "products_table = products_query.table_name\n",
+ "\n",
+ "orders_query = orders_feature_group.athena_query()\n",
+ "orders_table = orders_query.table_name\n",
+ "\n",
+ "click_stream_historical_query = click_stream_historical_feature_group.athena_query()\n",
+ "click_stream_historical_table = click_stream_historical_query.table_name\n",
+ "\n",
+ "click_stream_query = click_stream_feature_group.athena_query()\n",
+ "click_stream_table = click_stream_query.table_name\n",
"\n",
"# Store table names locally to be used in other notebooks\n",
- "ps.add({'customers_table': customers_table,\n",
- " 'products_table': products_table,\n",
- " 'orders_table': orders_table,\n",
- " 'click_stream_historical_table': click_stream_historical_table,\n",
- " 'click_stream_table': click_stream_table})"
+ "ps.add({\n",
+ " 'customers_table': customers_table,\n",
+ " 'products_table': products_table,\n",
+ " 'orders_table': orders_table,\n",
+ " 'click_stream_historical_table': click_stream_historical_table,\n",
+ " 'click_stream_table': click_stream_table\n",
+ "})"
]
},
{
@@ -311,20 +999,44 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 40,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "IngestionManagerPandas(feature_group_name='recsys-click-stream-historical-fg-05-29-00-10', feature_definitions={'customer_id': {'FeatureName': 'customer_id', 'FeatureType': 'String'}, 'product_id': {'FeatureName': 'product_id', 'FeatureType': 'String'}, 'bought': {'FeatureName': 'bought', 'FeatureType': 'Integral'}, 'healthy_activity_last_2m': {'FeatureName': 'healthy_activity_last_2m', 'FeatureType': 'Integral'}, 'rating': {'FeatureName': 'rating', 'FeatureType': 'Fractional'}, 'event_time': {'FeatureName': 'event_time', 'FeatureType': 'Fractional'}}, sagemaker_fs_runtime_client_config=, sagemaker_session=, max_workers=3, max_processes=1, profile_name=None, _async_result=, _processing_pool=, _failed_indices=[])"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "ingest_data_into_feature_group(df_customers, customers_feature_group)\n",
+ "# Ingest data into the feature groups\n",
+ "customers_feature_group.ingest(data_frame=df_customers, max_workers=3, wait=False)\n",
"customers_count = df_customers.shape[0]\n",
"\n",
- "ingest_data_into_feature_group(df_products, products_feature_group)\n",
+ "products_feature_group.ingest(data_frame=df_products, max_workers=3, wait=False)\n",
"products_count = df_products.shape[0]\n",
"\n",
- "ingest_data_into_feature_group(df_orders, orders_feature_group)\n",
+ "orders_feature_group.ingest(data_frame=df_orders, max_workers=3, wait=False)\n",
"orders_count = df_orders.shape[0]\n",
"\n",
- "ingest_data_into_feature_group(df_click_stream_historical, click_stream_historical_feature_group)\n",
+ "click_stream_historical_feature_group.ingest(data_frame=df_click_stream_historical, max_workers=3, wait=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
"click_stream_historical_count = df_click_stream_historical.shape[0]\n",
"\n",
"# Add Feature Group counts for later use\n",
@@ -351,18 +1063,6 @@
""
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "wait_for_offline_data(customers_feature_group_name, df_customers, sagemaker_session)\n",
- "wait_for_offline_data(products_feature_group_name, df_products, sagemaker_session)\n",
- "wait_for_offline_data(orders_feature_group_name, df_orders, sagemaker_session)\n",
- "wait_for_offline_data(click_stream_historical_feature_group_name, df_click_stream_historical, sagemaker_session)"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -372,9 +1072,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 43,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Offline Feature Store S3 Link"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"account_id = default_bucket.split('-')[-1]\n",
"offline_store_url = f'https://s3.console.aws.amazon.com/s3/buckets/{default_bucket}?region={region}&prefix={prefix}/{account_id}/sagemaker/{region}/offline-store/&showversions=false'\n",
@@ -383,9 +1098,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 44,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "date and time: 29/05/2024 00:34:44\n"
+ ]
+ }
+ ],
"source": [
"# Save all our local params\n",
"ps.store()"
@@ -400,14 +1125,619 @@
}
],
"metadata": {
- "instance_type": "ml.m5.4xlarge",
+ "availableInstances": [
+ {
+ "_defaultOrder": 0,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.t3.medium",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 1,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.t3.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 2,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.t3.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 3,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.t3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 4,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 5,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 6,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 7,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 8,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 9,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 10,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 11,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 12,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5d.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 13,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5d.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 14,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5d.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 15,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5d.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 16,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5d.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 17,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5d.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 18,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5d.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 19,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 20,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": true,
+ "memoryGiB": 0,
+ "name": "ml.geospatial.interactive",
+ "supportedImageNames": [
+ "sagemaker-geospatial-v1-0"
+ ],
+ "vcpuNum": 0
+ },
+ {
+ "_defaultOrder": 21,
+ "_isFastLaunch": true,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.c5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 22,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.c5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 23,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.c5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 24,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.c5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 25,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 72,
+ "name": "ml.c5.9xlarge",
+ "vcpuNum": 36
+ },
+ {
+ "_defaultOrder": 26,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 96,
+ "name": "ml.c5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 27,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 144,
+ "name": "ml.c5.18xlarge",
+ "vcpuNum": 72
+ },
+ {
+ "_defaultOrder": 28,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.c5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 29,
+ "_isFastLaunch": true,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g4dn.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 30,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g4dn.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 31,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g4dn.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 32,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g4dn.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 33,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g4dn.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 34,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g4dn.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 35,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 61,
+ "name": "ml.p3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 36,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 244,
+ "name": "ml.p3.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 37,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 488,
+ "name": "ml.p3.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 38,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.p3dn.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 39,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.r5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 40,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.r5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 41,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.r5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 42,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.r5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 43,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.r5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 44,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.r5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 45,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.r5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 46,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.r5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 47,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 48,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 49,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 50,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 51,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 52,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 53,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.g5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 54,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.g5.48xlarge",
+ "vcpuNum": 192
+ },
+ {
+ "_defaultOrder": 55,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 56,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4de.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 57,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.trn1.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 58,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1.32xlarge",
+ "vcpuNum": 128
+ },
+ {
+ "_defaultOrder": 59,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1n.32xlarge",
+ "vcpuNum": 128
+ }
+ ],
+ "instance_type": "ml.t3.medium",
"interpreter": {
"hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322"
},
"kernelspec": {
- "display_name": "Python 3 (Data Science)",
+ "display_name": "Python 3 (Data Science 3.0)",
"language": "python",
- "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
+ "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1"
},
"language_info": {
"codemirror_mode": {
@@ -419,7 +1749,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.10"
+ "version": "3.10.6"
}
},
"nbformat": 4,
diff --git a/2_recommendation_engine_models.ipynb b/2_recommendation_engine_models.ipynb
index 15823a9..7f416ec 100644
--- a/2_recommendation_engine_models.ipynb
+++ b/2_recommendation_engine_models.ipynb
@@ -37,8 +37,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 6,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"import sagemaker\n",
@@ -72,8 +74,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 7,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"role = sagemaker.get_execution_role()\n",
@@ -89,8 +93,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 8,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"# CF model variables\n",
@@ -122,8 +128,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 9,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"parameters = ps.read()\n",
@@ -134,11 +142,58 @@
"click_stream_historical_feature_group_name = parameters['click_stream_historical_feature_group_name']\n",
"click_stream_feature_group_name = parameters['click_stream_feature_group_name']\n",
"\n",
- "customers_table = parameters['customers_table']\n",
- "products_table = parameters['products_table']\n",
- "orders_table = parameters['orders_table']\n",
- "click_stream_historical_table = parameters['click_stream_historical_table']\n",
- "click_stream_table = parameters['click_stream_table']"
+ "customers_feature_group = FeatureGroup(\n",
+ " name=customers_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")\n",
+ "products_feature_group = FeatureGroup(\n",
+ " name=products_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")\n",
+ "orders_feature_group = FeatureGroup(\n",
+ " name=orders_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")\n",
+ "click_stream_historical_feature_group = FeatureGroup(\n",
+ " name=click_stream_historical_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")\n",
+ "click_stream_feature_group = FeatureGroup(\n",
+ " name=click_stream_feature_group_name, sagemaker_session=sagemaker_session\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "FeatureGroup recsys-customers-fg-05-29-00-10 successfully created.\n",
+ "FeatureGroup recsys-products-fg-05-29-00-10 successfully created.\n",
+ "FeatureGroup recsys-orders-fg-05-29-00-10 successfully created.\n",
+ "FeatureGroup recsys-click-stream-historical-fg-05-29-00-10 successfully created.\n",
+ "FeatureGroup recsys-click-stream-fg-05-29-00-10 successfully created.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# df_click_stream_historical_data.head()\n",
+ "def check_feature_group_status(feature_group):\n",
+ " status = feature_group.describe().get(\"FeatureGroupStatus\")\n",
+ " while status == \"Creating\":\n",
+ " print(\"Waiting for Feature Group to be Created\")\n",
+ " time.sleep(5)\n",
+ " status = feature_group.describe().get(\"FeatureGroupStatus\")\n",
+ " print(f\"FeatureGroup {feature_group.name} successfully created.\")\n",
+ " \n",
+ " \n",
+ "check_feature_group_status(customers_feature_group)\n",
+ "check_feature_group_status(products_feature_group)\n",
+ "check_feature_group_status(orders_feature_group)\n",
+ "check_feature_group_status(click_stream_historical_feature_group)\n",
+ "check_feature_group_status(click_stream_feature_group)"
]
},
{
@@ -159,10 +214,128 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 11,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " product_id | \n",
+ " rating | \n",
+ " state | \n",
+ " age | \n",
+ " is_married | \n",
+ " product_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " C2786 | \n",
+ " P1206 | \n",
+ " 1.199858 | \n",
+ " new mexico | \n",
+ " 75 | \n",
+ " 0 | \n",
+ " creamy caramel filled hard candies | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " C866 | \n",
+ " P1206 | \n",
+ " 1.424285 | \n",
+ " delaware | \n",
+ " 35 | \n",
+ " 1 | \n",
+ " creamy caramel filled hard candies | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " C2786 | \n",
+ " P14984 | \n",
+ " 2.513348 | \n",
+ " new mexico | \n",
+ " 75 | \n",
+ " 0 | \n",
+ " 1 apple + 1 mango fruit bar | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " C4053 | \n",
+ " P992 | \n",
+ " 2.855658 | \n",
+ " louisiana | \n",
+ " 24 | \n",
+ " 1 | \n",
+ " outshine simply yogurt bars strawberry | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " C4913 | \n",
+ " P14341 | \n",
+ " 2.773447 | \n",
+ " north carolina | \n",
+ " 28 | \n",
+ " 0 | \n",
+ " vegetarian chili seasoning | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id product_id rating state age is_married \\\n",
+ "0 C2786 P1206 1.199858 new mexico 75 0 \n",
+ "1 C866 P1206 1.424285 delaware 35 1 \n",
+ "2 C2786 P14984 2.513348 new mexico 75 0 \n",
+ "3 C4053 P992 2.855658 louisiana 24 1 \n",
+ "4 C4913 P14341 2.773447 north carolina 28 0 \n",
+ "\n",
+ " product_name \n",
+ "0 creamy caramel filled hard candies \n",
+ "1 creamy caramel filled hard candies \n",
+ "2 1 apple + 1 mango fruit bar \n",
+ "3 outshine simply yogurt bars strawberry \n",
+ "4 vegetarian chili seasoning "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
+ "click_stream_query = click_stream_historical_feature_group.athena_query()\n",
+ "click_stream_historical_table = click_stream_query.table_name\n",
+ "\n",
+ "customers_query = customers_feature_group.athena_query()\n",
+ "customers_table = customers_query.table_name\n",
+ "\n",
+ "products_query = products_feature_group.athena_query()\n",
+ "products_table = products_query.table_name\n",
+ "\n",
+ "\n",
"query = f'''\n",
"select click_stream_customers.customer_id,\n",
" products.product_id,\n",
@@ -189,9 +362,11 @@
"where click_stream_customers.bought = 1\n",
"'''\n",
"\n",
- "df_cf_features, query = query_offline_store(click_stream_feature_group_name, query,\n",
- " sagemaker_session)\n",
- "df_cf_features.head()"
+ "df_cf_features = pd.DataFrame()\n",
+ "click_stream_query.run(query_string=query, output_location='s3://'+default_bucket+'/query_results/')\n",
+ "click_stream_query.wait()\n",
+ "df_cf_features = click_stream_query.as_dataframe()\n",
+ "df_cf_features.head(5)"
]
},
{
@@ -221,8 +396,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 12,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def transform_cf_data(training_df, inference_df=None):\n",
@@ -277,8 +454,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 13,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"X, y = load_dataset(df_cf_features)"
@@ -293,9 +472,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 14,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(65393, 28366) (16349, 28366) (65393,) (16349,)\n"
+ ]
+ }
+ ],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
@@ -320,9 +509,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 15,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "s3://sagemaker-ap-southeast-2-XXXXXXXXXXXX/recsys/train/train.protobuf\n",
+ "s3://sagemaker-ap-southeast-2-XXXXXXXXXXXX/recsys/test/test.protobuf\n",
+ "Output: s3://sagemaker-ap-southeast-2-XXXXXXXXXXXX/recsys/output\n"
+ ]
+ }
+ ],
"source": [
"def write_dataset_to_protobuf(X, y, bucket, prefix, key):\n",
" \"\"\"\n",
@@ -352,8 +553,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 16,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"# Add variables to be saved for later notebooks\n",
@@ -385,8 +588,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 17,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"container = sagemaker.image_uris.retrieve(\"factorization-machines\", region=region)\n",
@@ -420,17 +625,305 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 18,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:sagemaker:Creating training-job with name: factorization-machines-2024-05-29-00-35-05-125\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2024-05-29 00:35:05 Starting - Starting the training job...\n",
+ "2024-05-29 00:35:21 Starting - Preparing the instances for training...\n",
+ "2024-05-29 00:35:58 Downloading - Downloading the training image.....................\n",
+ "2024-05-29 00:39:24 Training - Training image download completed. Training in progress..\u001b[34mDocker entrypoint called with argument(s): train\u001b[0m\n",
+ "\u001b[34mRunning default environment configuration script\u001b[0m\n",
+ "\u001b[34m/opt/amazon/lib/python3.8/site-packages/mxnet/model.py:97: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
+ " if num_device is 1 and 'dist' not in kvstore:\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-conf.json: {'epochs': 1, 'mini_batch_size': '1000', 'use_bias': 'true', 'use_linear': 'true', 'bias_lr': '0.1', 'linear_lr': '0.001', 'factors_lr': '0.0001', 'bias_wd': '0.01', 'linear_wd': '0.001', 'factors_wd': '0.00001', 'bias_init_method': 'normal', 'bias_init_sigma': '0.01', 'linear_init_method': 'normal', 'linear_init_sigma': '0.01', 'factors_init_method': 'normal', 'factors_init_sigma': '0.001', 'batch_metrics_publish_interval': '500', '_data_format': 'record', '_kvstore': 'auto', '_learning_rate': '1.0', '_log_level': 'info', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_optimizer': 'adam', '_tuning_objective_metric': '', '_use_full_symbolic': 'true', '_wd': '1.0'}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Merging with provided configuration from /opt/ml/input/config/hyperparameters.json: {'epochs': '20', 'feature_dim': '28366', 'mini_batch_size': '1000', 'num_factors': '64', 'predictor_type': 'regressor'}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Final configuration: {'epochs': '20', 'mini_batch_size': '1000', 'use_bias': 'true', 'use_linear': 'true', 'bias_lr': '0.1', 'linear_lr': '0.001', 'factors_lr': '0.0001', 'bias_wd': '0.01', 'linear_wd': '0.001', 'factors_wd': '0.00001', 'bias_init_method': 'normal', 'bias_init_sigma': '0.01', 'linear_init_method': 'normal', 'linear_init_sigma': '0.01', 'factors_init_method': 'normal', 'factors_init_sigma': '0.001', 'batch_metrics_publish_interval': '500', '_data_format': 'record', '_kvstore': 'auto', '_learning_rate': '1.0', '_log_level': 'info', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_optimizer': 'adam', '_tuning_objective_metric': '', '_use_full_symbolic': 'true', '_wd': '1.0', 'feature_dim': '28366', 'num_factors': '64', 'predictor_type': 'regressor'}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 WARNING 140627447908160] Loggers have already been setup.\u001b[0m\n",
+ "\u001b[34mProcess 6 is a worker.\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Using default worker.\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Checkpoint loading and saving are disabled.\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:33.034] [tensorio] [warning] TensorIO is already initialized; ignoring the initialization routine.\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:33.037] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 0, \"duration\": 8, \"num_examples\": 1, \"num_bytes\": 111048}\u001b[0m\n",
+ "\u001b[34m/opt/amazon/python3.8/lib/python3.8/subprocess.py:848: RuntimeWarning: line buffering (buffering=1) isn't supported in binary mode, the default buffer size will be used\n",
+ " self.stdout = io.open(c2pread, 'rb', bufsize)\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] nvidia-smi: took 0.030 seconds to run.\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] nvidia-smi identified 0 GPUs.\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Number of GPUs being used: 0\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] [Sparse network] Building a sparse network.\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Create Store: local\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943173.0295057, \"EndTime\": 1716943173.070657, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"initialize.time\": {\"sum\": 33.90049934387207, \"count\": 1, \"min\": 33.90049934387207, \"max\": 33.90049934387207}}}\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943173.0707624, \"EndTime\": 1716943173.070795, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"Meta\": \"init_train_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1000.0, \"count\": 1, \"min\": 1000, \"max\": 1000}, \"Total Batches Seen\": {\"sum\": 1.0, \"count\": 1, \"min\": 1, \"max\": 1}, \"Max Records Seen Between Resets\": {\"sum\": 1000.0, \"count\": 1, \"min\": 1000, \"max\": 1000}, \"Max Batches Seen Between Resets\": {\"sum\": 1.0, \"count\": 1, \"min\": 1, \"max\": 1}, \"Reset Count\": {\"sum\": 1.0, \"count\": 1, \"min\": 1, \"max\": 1}, \"Number of Records Since Last Reset\": {\"sum\": 0.0, \"count\": 1, \"min\": 0, \"max\": 0}, \"Number of Batches Since Last Reset\": {\"sum\": 0.0, \"count\": 1, \"min\": 0, \"max\": 0}}}\u001b[0m\n",
+ "\u001b[34m[00:39:33] /opt/brazil-pkg-cache/packages/AIAlgorithmsMXNet/AIAlgorithmsMXNet-1.3.x_Cuda_11.1.x.404.0/AL2_x86_64/generic-flavor/src/src/kvstore/./kvstore_local.h:306: Warning: non-default weights detected during kvstore pull. This call has been ignored. Please make sure to use kv.row_sparse_pull() or module.prepare() with row_ids.\u001b[0m\n",
+ "\u001b[34m[00:39:33] /opt/brazil-pkg-cache/packages/AIAlgorithmsMXNet/AIAlgorithmsMXNet-1.3.x_Cuda_11.1.x.404.0/AL2_x86_64/generic-flavor/src/src/kvstore/./kvstore_local.h:306: Warning: non-default weights detected during kvstore pull. This call has been ignored. Please make sure to use kv.row_sparse_pull() or module.prepare() with row_ids.\u001b[0m\n",
+ "\u001b[34m[00:39:33] /opt/brazil-pkg-cache/packages/AIAlgorithmsMXNet/AIAlgorithmsMXNet-1.3.x_Cuda_11.1.x.404.0/AL2_x86_64/generic-flavor/src/src/operator/././../common/utils.h:450: Optimizer with lazy_update = True detected. Be aware that lazy update with row_sparse gradient is different from standard update, and may lead to different empirical results. See https://mxnet.incubator.apache.org/api/python/optimization/optimization.html for more details.\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, batch=0 train rmse =2.3243699793236985\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, batch=0 train mse =5.40269580078125\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, batch=0 train absolute_loss =2.219640380859375\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:33.930] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 2, \"duration\": 769, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, train rmse =1.04838653056041\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, train mse =1.0991143174604936\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, train absolute_loss =0.8338966582327179\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943173.0707183, \"EndTime\": 1716943173.931281, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"epochs\": {\"sum\": 20.0, \"count\": 1, \"min\": 20, \"max\": 20}, \"update.time\": {\"sum\": 860.2795600891113, \"count\": 1, \"min\": 860.2795600891113, \"max\": 860.2795600891113}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #progress_metric: host=algo-1, completed 5.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943173.0709755, \"EndTime\": 1716943173.9314995, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 0, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 66393.0, \"count\": 1, \"min\": 66393, \"max\": 66393}, \"Total Batches Seen\": {\"sum\": 67.0, \"count\": 1, \"min\": 67, \"max\": 67}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 2.0, \"count\": 1, \"min\": 2, \"max\": 2}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=75985.0648132552 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, batch=0 train rmse =0.699270533173287\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, batch=0 train mse =0.4889792785644531\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, batch=0 train absolute_loss =0.5935192260742187\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:34.948] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 4, \"duration\": 1015, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, train rmse =0.7150839339986994\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, train mse =0.5113450326630563\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, train absolute_loss =0.6103684655391809\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943173.9313607, \"EndTime\": 1716943174.9494588, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 1017.7371501922607, \"count\": 1, \"min\": 1017.7371501922607, \"max\": 1017.7371501922607}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #progress_metric: host=algo-1, completed 10.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943173.9316964, \"EndTime\": 1716943174.949821, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 1, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 131786.0, \"count\": 1, \"min\": 131786, \"max\": 131786}, \"Total Batches Seen\": {\"sum\": 133.0, \"count\": 1, \"min\": 133, \"max\": 133}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 3.0, \"count\": 1, \"min\": 3, \"max\": 3}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=64222.65226103779 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, batch=0 train rmse =0.6905555103261409\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, batch=0 train mse =0.4768669128417969\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, batch=0 train absolute_loss =0.586952880859375\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:35.908] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 6, \"duration\": 952, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, train rmse =0.7090962377316878\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, train mse =0.5028174743652344\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, train absolute_loss =0.6054784185236151\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943174.949536, \"EndTime\": 1716943175.9089746, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 958.7352275848389, \"count\": 1, \"min\": 958.7352275848389, \"max\": 958.7352275848389}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #progress_metric: host=algo-1, completed 15.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943174.950213, \"EndTime\": 1716943175.9092712, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 2, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 197179.0, \"count\": 1, \"min\": 197179, \"max\": 197179}, \"Total Batches Seen\": {\"sum\": 199.0, \"count\": 1, \"min\": 199, \"max\": 199}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 4.0, \"count\": 1, \"min\": 4, \"max\": 4}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=68177.25548425746 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, batch=0 train rmse =0.6858606609154707\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, batch=0 train mse =0.47040484619140627\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, batch=0 train absolute_loss =0.5828222045898438\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:36.728] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 8, \"duration\": 817, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, train rmse =0.7023911057760747\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, train mse =0.4933532654733369\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, train absolute_loss =0.5996325369170218\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943175.9090288, \"EndTime\": 1716943176.729298, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 819.7922706604004, \"count\": 1, \"min\": 819.7922706604004, \"max\": 819.7922706604004}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #progress_metric: host=algo-1, completed 20.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943175.9094827, \"EndTime\": 1716943176.7295601, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 3, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 262572.0, \"count\": 1, \"min\": 262572, \"max\": 262572}, \"Total Batches Seen\": {\"sum\": 265.0, \"count\": 1, \"min\": 265, \"max\": 265}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 5.0, \"count\": 1, \"min\": 5, \"max\": 5}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=79730.76046829497 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, batch=0 train rmse =0.679621180313319\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, batch=0 train mse =0.46188494873046876\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, batch=0 train absolute_loss =0.5771847534179687\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:37.503] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 10, \"duration\": 771, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, train rmse =0.6938342617224935\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, train mse =0.48140598273999763\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, train absolute_loss =0.5921244959975734\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943176.7293534, \"EndTime\": 1716943177.5039217, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 774.1501331329346, \"count\": 1, \"min\": 774.1501331329346, \"max\": 774.1501331329346}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #progress_metric: host=algo-1, completed 25.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943176.729749, \"EndTime\": 1716943177.504056, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 4, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 327965.0, \"count\": 1, \"min\": 327965, \"max\": 327965}, \"Total Batches Seen\": {\"sum\": 331.0, \"count\": 1, \"min\": 331, \"max\": 331}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 6.0, \"count\": 1, \"min\": 6, \"max\": 6}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=84444.792460138 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, batch=0 train rmse =0.6723749729489096\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, batch=0 train mse =0.45208810424804685\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, batch=0 train absolute_loss =0.5705239868164063\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:38.292] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 12, \"duration\": 783, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, train rmse =0.6841473968757971\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, train mse =0.46805766065192944\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, train absolute_loss =0.5835006454930161\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943177.5039735, \"EndTime\": 1716943178.293152, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 788.9235019683838, \"count\": 1, \"min\": 788.9235019683838, \"max\": 788.9235019683838}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #progress_metric: host=algo-1, completed 30.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943177.5042048, \"EndTime\": 1716943178.293383, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 5, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 393358.0, \"count\": 1, \"min\": 393358, \"max\": 393358}, \"Total Batches Seen\": {\"sum\": 397.0, \"count\": 1, \"min\": 397, \"max\": 397}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 7.0, \"count\": 1, \"min\": 7, \"max\": 7}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=82852.19188322074 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, batch=0 train rmse =0.664622994159271\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, batch=0 train mse =0.4417237243652344\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, batch=0 train absolute_loss =0.5635284423828125\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:39.068] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 14, \"duration\": 770, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, train rmse =0.6739215477205758\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, train mse =0.45417025248209636\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, train absolute_loss =0.5742900714296283\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943178.2932088, \"EndTime\": 1716943179.069036, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 775.421142578125, \"count\": 1, \"min\": 775.421142578125, \"max\": 775.421142578125}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #progress_metric: host=algo-1, completed 35.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943178.2935927, \"EndTime\": 1716943179.069173, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 6, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 458751.0, \"count\": 1, \"min\": 458751, \"max\": 458751}, \"Total Batches Seen\": {\"sum\": 463.0, \"count\": 1, \"min\": 463, \"max\": 463}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 8.0, \"count\": 1, \"min\": 8, \"max\": 8}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=84306.73052890941 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, batch=0 train rmse =0.6566801510635001\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, batch=0 train mse =0.43122882080078123\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, batch=0 train absolute_loss =0.5564387817382812\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:39.834] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 16, \"duration\": 763, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, train rmse =0.6635769646690042\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, train mse =0.44033438803932884\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, train absolute_loss =0.564879950321082\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943179.0690897, \"EndTime\": 1716943179.8349438, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 765.5997276306152, \"count\": 1, \"min\": 765.5997276306152, \"max\": 765.5997276306152}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #progress_metric: host=algo-1, completed 40.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943179.0693188, \"EndTime\": 1716943179.8351235, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 7, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 524144.0, \"count\": 1, \"min\": 524144, \"max\": 524144}, \"Total Batches Seen\": {\"sum\": 529.0, \"count\": 1, \"min\": 529, \"max\": 529}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 9.0, \"count\": 1, \"min\": 9, \"max\": 9}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=85380.07795078939 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, batch=0 train rmse =0.6487400461363069\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, batch=0 train mse =0.4208636474609375\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, batch=0 train absolute_loss =0.5494663696289063\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:40.590] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 18, \"duration\": 753, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, train rmse =0.6534163943720617\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, train mse =0.4269529844341856\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, train absolute_loss =0.5555228363961885\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943179.835001, \"EndTime\": 1716943180.591622, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 756.2096118927002, \"count\": 1, \"min\": 756.2096118927002, \"max\": 756.2096118927002}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #progress_metric: host=algo-1, completed 45.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943179.8353837, \"EndTime\": 1716943180.591916, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 8, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 589537.0, \"count\": 1, \"min\": 589537, \"max\": 589537}, \"Total Batches Seen\": {\"sum\": 595.0, \"count\": 1, \"min\": 595, \"max\": 595}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 10.0, \"count\": 1, \"min\": 10, \"max\": 10}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=86422.05922478618 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, batch=0 train rmse =0.6409521363668906\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, batch=0 train mse =0.41081964111328123\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, batch=0 train absolute_loss =0.5427968139648438\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:41.368] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 20, \"duration\": 771, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, train rmse =0.6436583883131721\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, train mse =0.4142961208459103\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, train absolute_loss =0.5464532059178208\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943180.5917187, \"EndTime\": 1716943181.3686094, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 776.268482208252, \"count\": 1, \"min\": 776.268482208252, \"max\": 776.268482208252}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #progress_metric: host=algo-1, completed 50.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943180.592315, \"EndTime\": 1716943181.3688226, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 9, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 654930.0, \"count\": 1, \"min\": 654930, \"max\": 654930}, \"Total Batches Seen\": {\"sum\": 661.0, \"count\": 1, \"min\": 661, \"max\": 661}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 11.0, \"count\": 1, \"min\": 11, \"max\": 11}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=84197.7997148159 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, batch=0 train rmse =0.6334473089090067\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, batch=0 train mse =0.4012554931640625\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, batch=0 train absolute_loss =0.5361207885742187\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:42.121] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 22, \"duration\": 749, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, train rmse =0.6344510547758051\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, train mse =0.40252814090613165\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, train absolute_loss =0.5378079723011364\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943181.3686695, \"EndTime\": 1716943182.1223965, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 753.2670497894287, \"count\": 1, \"min\": 753.2670497894287, \"max\": 753.2670497894287}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #progress_metric: host=algo-1, completed 55.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943181.3691046, \"EndTime\": 1716943182.1225317, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 10, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 720323.0, \"count\": 1, \"min\": 720323, \"max\": 720323}, \"Total Batches Seen\": {\"sum\": 727.0, \"count\": 1, \"min\": 727, \"max\": 727}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 12.0, \"count\": 1, \"min\": 12, \"max\": 12}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=86780.1220245459 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, batch=0 train rmse =0.6263321496378115\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, batch=0 train mse =0.39229196166992186\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, batch=0 train absolute_loss =0.5296707763671875\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:42.936] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 24, \"duration\": 812, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, train rmse =0.6258821093660648\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, train mse =0.3917284148245147\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, train absolute_loss =0.5296877159349846\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943182.1224484, \"EndTime\": 1716943182.9367995, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 814.0368461608887, \"count\": 1, \"min\": 814.0368461608887, \"max\": 814.0368461608887}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #progress_metric: host=algo-1, completed 60.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943182.1227415, \"EndTime\": 1716943182.9370315, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 11, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 785716.0, \"count\": 1, \"min\": 785716, \"max\": 785716}, \"Total Batches Seen\": {\"sum\": 793.0, \"count\": 1, \"min\": 793, \"max\": 793}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 13.0, \"count\": 1, \"min\": 13, \"max\": 13}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=80297.43202556841 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, batch=0 train rmse =0.6196789851832434\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, batch=0 train mse =0.38400204467773436\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, batch=0 train absolute_loss =0.5234930419921875\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:43.692] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 26, \"duration\": 753, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, train rmse =0.6179902861519048\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, train mse =0.3819119937781132\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, train absolute_loss =0.5221529467033618\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943182.93689, \"EndTime\": 1716943183.692754, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 755.4678916931152, \"count\": 1, \"min\": 755.4678916931152, \"max\": 755.4678916931152}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #progress_metric: host=algo-1, completed 65.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943182.9372606, \"EndTime\": 1716943183.6929443, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 12, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 851109.0, \"count\": 1, \"min\": 851109, \"max\": 851109}, \"Total Batches Seen\": {\"sum\": 859.0, \"count\": 1, \"min\": 859, \"max\": 859}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 14.0, \"count\": 1, \"min\": 14, \"max\": 14}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=86523.25777123167 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, batch=0 train rmse =0.613523563793664\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, batch=0 train mse =0.3764111633300781\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, batch=0 train absolute_loss =0.5177263793945313\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:44.538] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 28, \"duration\": 840, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, train rmse =0.610777409585509\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, train mse =0.3730490440599846\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, train absolute_loss =0.5152088220769708\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943183.6928098, \"EndTime\": 1716943184.538854, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 845.649242401123, \"count\": 1, \"min\": 845.649242401123, \"max\": 845.649242401123}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #progress_metric: host=algo-1, completed 70.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943183.6931825, \"EndTime\": 1716943184.5389938, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 13, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 916502.0, \"count\": 1, \"min\": 916502, \"max\": 916502}, \"Total Batches Seen\": {\"sum\": 925.0, \"count\": 1, \"min\": 925, \"max\": 925}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 15.0, \"count\": 1, \"min\": 15, \"max\": 15}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=77306.3238888831 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, batch=0 train rmse =0.6078711662320628\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, batch=0 train mse =0.3695073547363281\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, batch=0 train absolute_loss =0.5121652221679688\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:45.430] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 30, \"duration\": 890, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, train rmse =0.6042197229573179\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, train mse =0.3650814736106179\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, train absolute_loss =0.5088311596494732\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943184.5389092, \"EndTime\": 1716943185.4312375, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 892.073392868042, \"count\": 1, \"min\": 892.073392868042, \"max\": 892.073392868042}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #progress_metric: host=algo-1, completed 75.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943184.5391417, \"EndTime\": 1716943185.4313726, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 14, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 981895.0, \"count\": 1, \"min\": 981895, \"max\": 981895}, \"Total Batches Seen\": {\"sum\": 991.0, \"count\": 1, \"min\": 991, \"max\": 991}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 16.0, \"count\": 1, \"min\": 16, \"max\": 16}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=73285.19445424894 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, batch=0 train rmse =0.6027058179398268\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, batch=0 train mse =0.36325430297851563\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, batch=0 train absolute_loss =0.5068946533203125\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:46.285] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 32, \"duration\": 849, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, train rmse =0.598277434069688\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, train mse =0.35793588811700994\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, train absolute_loss =0.5029859531286991\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943185.4312897, \"EndTime\": 1716943186.2860541, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 854.485034942627, \"count\": 1, \"min\": 854.485034942627, \"max\": 854.485034942627}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #progress_metric: host=algo-1, completed 80.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943185.4315462, \"EndTime\": 1716943186.2862678, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 15, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1047288.0, \"count\": 1, \"min\": 1047288, \"max\": 1047288}, \"Total Batches Seen\": {\"sum\": 1057.0, \"count\": 1, \"min\": 1057, \"max\": 1057}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 17.0, \"count\": 1, \"min\": 17, \"max\": 17}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=76500.53174537209 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, batch=0 train rmse =0.597998982610443\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, batch=0 train mse =0.357602783203125\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, batch=0 train absolute_loss =0.5020321655273438\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:47.030] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 34, \"duration\": 742, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, train rmse =0.5929021012345286\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, train mse =0.35153290164831913\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, train absolute_loss =0.4976553497314453\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943186.2861469, \"EndTime\": 1716943187.031127, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 744.6818351745605, \"count\": 1, \"min\": 744.6818351745605, \"max\": 744.6818351745605}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #progress_metric: host=algo-1, completed 85.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943186.2864232, \"EndTime\": 1716943187.031272, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 16, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1112681.0, \"count\": 1, \"min\": 1112681, \"max\": 1112681}, \"Total Batches Seen\": {\"sum\": 1123.0, \"count\": 1, \"min\": 1123, \"max\": 1123}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 18.0, \"count\": 1, \"min\": 18, \"max\": 18}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=87784.25226271317 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, batch=0 train rmse =0.5937158707296362\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, batch=0 train mse =0.35249853515625\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, batch=0 train absolute_loss =0.49739056396484377\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:47.781] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 36, \"duration\": 748, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, train rmse =0.5880419338814606\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, train mse =0.3457933160030481\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, train absolute_loss =0.49279525710597183\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943187.031182, \"EndTime\": 1716943187.7815292, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 750.0548362731934, \"count\": 1, \"min\": 750.0548362731934, \"max\": 750.0548362731934}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #progress_metric: host=algo-1, completed 90.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943187.0314512, \"EndTime\": 1716943187.7816916, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 17, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1178074.0, \"count\": 1, \"min\": 1178074, \"max\": 1178074}, \"Total Batches Seen\": {\"sum\": 1189.0, \"count\": 1, \"min\": 1189, \"max\": 1189}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 19.0, \"count\": 1, \"min\": 19, \"max\": 19}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=87153.18146307641 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, batch=0 train rmse =0.5898197946294433\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, batch=0 train mse =0.34788739013671877\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, batch=0 train absolute_loss =0.49295233154296875\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:48.601] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 38, \"duration\": 817, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, train rmse =0.5836454162716652\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, train mse =0.3406419719349254\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, train absolute_loss =0.4883592219497218\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943187.7815838, \"EndTime\": 1716943188.6016605, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 819.7531700134277, \"count\": 1, \"min\": 819.7531700134277, \"max\": 819.7531700134277}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #progress_metric: host=algo-1, completed 95.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943187.7818813, \"EndTime\": 1716943188.6018543, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 18, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1243467.0, \"count\": 1, \"min\": 1243467, \"max\": 1243467}, \"Total Batches Seen\": {\"sum\": 1255.0, \"count\": 1, \"min\": 1255, \"max\": 1255}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 20.0, \"count\": 1, \"min\": 20, \"max\": 20}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=79740.61198809523 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, batch=0 train rmse =0.5862745645090143\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, batch=0 train mse =0.3437178649902344\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, batch=0 train absolute_loss =0.48873703002929686\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:49.379] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 40, \"duration\": 775, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, train rmse =0.5796635500736886\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, train mse =0.3360098312840317\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, train absolute_loss =0.4843095837217389\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, train rmse =0.5796635500736886\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, train mse =0.3360098312840317\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, train absolute_loss =0.4843095837217389\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943188.6017206, \"EndTime\": 1716943189.3805523, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 778.4221172332764, \"count\": 1, \"min\": 778.4221172332764, \"max\": 778.4221172332764}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #progress_metric: host=algo-1, completed 100.0 % of epochs\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943188.6021047, \"EndTime\": 1716943189.3807492, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 19, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1308860.0, \"count\": 1, \"min\": 1308860, \"max\": 1308860}, \"Total Batches Seen\": {\"sum\": 1321.0, \"count\": 1, \"min\": 1321, \"max\": 1321}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 21.0, \"count\": 1, \"min\": 21, \"max\": 21}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=83971.73006032793 records/second\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 WARNING 140627447908160] wait_for_all_workers will not sync workers since the kv store is not running distributed\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] Pulling entire model from kvstore to finalize\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943189.380624, \"EndTime\": 1716943189.386687, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"finalize.time\": {\"sum\": 5.669116973876953, \"count\": 1, \"min\": 5.669116973876953, \"max\": 5.669116973876953}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] Saved checkpoint to \"/tmp/tmpvigxala1/state-0001.params\"\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:49.419] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/test\", \"epoch\": 0, \"duration\": 16385, \"num_examples\": 1, \"num_bytes\": 111960}\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:39:49.532] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/test\", \"epoch\": 1, \"duration\": 112, \"num_examples\": 17, \"num_bytes\": 1828592}\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943189.4191773, \"EndTime\": 1716943189.532194, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"Meta\": \"test_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 16349.0, \"count\": 1, \"min\": 16349, \"max\": 16349}, \"Total Batches Seen\": {\"sum\": 17.0, \"count\": 1, \"min\": 17, \"max\": 17}, \"Max Records Seen Between Resets\": {\"sum\": 16349.0, \"count\": 1, \"min\": 16349, \"max\": 16349}, \"Max Batches Seen Between Resets\": {\"sum\": 17.0, \"count\": 1, \"min\": 17, \"max\": 17}, \"Reset Count\": {\"sum\": 1.0, \"count\": 1, \"min\": 1, \"max\": 1}, \"Number of Records Since Last Reset\": {\"sum\": 16349.0, \"count\": 1, \"min\": 16349, \"max\": 16349}, \"Number of Batches Since Last Reset\": {\"sum\": 17.0, \"count\": 1, \"min\": 17, \"max\": 17}}}\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #test_score (algo-1) : ('rmse', 0.7654671821101969)\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #test_score (algo-1) : ('mse', 0.5859400068877254)\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #test_score (algo-1) : ('absolute_loss', 0.6434210347315538)\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, test rmse =0.7654671821101969\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, test mse =0.5859400068877254\u001b[0m\n",
+ "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, test absolute_loss =0.6434210347315538\u001b[0m\n",
+ "\u001b[34m#metrics {\"StartTime\": 1716943189.3869832, \"EndTime\": 1716943189.532878, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"setuptime\": {\"sum\": 13.551473617553711, \"count\": 1, \"min\": 13.551473617553711, \"max\": 13.551473617553711}, \"totaltime\": {\"sum\": 16519.063711166382, \"count\": 1, \"min\": 16519.063711166382, \"max\": 16519.063711166382}}}\u001b[0m\n",
+ "\n",
+ "2024-05-29 00:40:03 Uploading - Uploading generated training model\n",
+ "2024-05-29 00:40:03 Completed - Training job completed\n",
+ "Training seconds: 259\n",
+ "Billable seconds: 259\n"
+ ]
+ }
+ ],
"source": [
"fm.fit({'train': train_data_location, 'test': test_data_location})"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 19,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"training_job_name = fm.latest_training_job.job_name"
@@ -452,9 +945,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 20,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:sagemaker:Creating model with name: factorization-machines-2024-05-29-00-40-17-951\n",
+ "INFO:sagemaker:Creating endpoint-config with name recsys-cf-model-05-29-00-34\n",
+ "INFO:sagemaker:Creating endpoint with name recsys-cf-model-05-29-00-34\n"
+ ]
+ }
+ ],
"source": [
"cf_model_predictor = fm.deploy(\n",
" endpoint_name = cf_model_endpoint_name,\n",
@@ -468,9 +973,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 21,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'recsys-cf-model-05-29-00-34'"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"cf_model_predictor.endpoint_name"
]
@@ -493,9 +1011,113 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 22,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:sagemaker:Query 3839da01-c1b9-45c3-980a-24b1b694198d is being executed.\n",
+ "INFO:sagemaker:Query 3839da01-c1b9-45c3-980a-24b1b694198d successfully executed.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bought | \n",
+ " healthy_activity_last_2m | \n",
+ " product_health_index | \n",
+ " customer_health_index | \n",
+ " product_category | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 0.9 | \n",
+ " 0.250698 | \n",
+ " vitamins_supplements | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0.9 | \n",
+ " 0.099806 | \n",
+ " energy_granola_bars | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 0.3 | \n",
+ " 0.250698 | \n",
+ " packaged_cheese | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0.3 | \n",
+ " 0.704001 | \n",
+ " baking_ingredients | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 10 | \n",
+ " 0.3 | \n",
+ " 0.250698 | \n",
+ " packaged_cheese | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bought healthy_activity_last_2m product_health_index \\\n",
+ "0 0 3 0.9 \n",
+ "1 0 1 0.9 \n",
+ "2 1 2 0.3 \n",
+ "3 1 0 0.3 \n",
+ "4 0 10 0.3 \n",
+ "\n",
+ " customer_health_index product_category \n",
+ "0 0.250698 vitamins_supplements \n",
+ "1 0.099806 energy_granola_bars \n",
+ "2 0.250698 packaged_cheese \n",
+ "3 0.704001 baking_ingredients \n",
+ "4 0.250698 packaged_cheese "
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"query = f'''\n",
"select bought,\n",
@@ -545,8 +1167,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 23,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"df_rank_features = pd.concat([df_rank_features, pd.get_dummies(df_rank_features['product_category'], prefix='prod_cat')], axis=1)\n",
@@ -555,9 +1179,246 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 24,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bought | \n",
+ " healthy_activity_last_2m | \n",
+ " product_health_index | \n",
+ " customer_health_index | \n",
+ " prod_cat_baby_food_formula | \n",
+ " prod_cat_baking_ingredients | \n",
+ " prod_cat_candy_chocolate | \n",
+ " prod_cat_chips_pretzels | \n",
+ " prod_cat_cleaning_products | \n",
+ " prod_cat_coffee | \n",
+ " ... | \n",
+ " prod_cat_hair_care | \n",
+ " prod_cat_ice_cream_ice | \n",
+ " prod_cat_juice_nectars | \n",
+ " prod_cat_packaged_cheese | \n",
+ " prod_cat_refrigerated | \n",
+ " prod_cat_soup_broth_bouillon | \n",
+ " prod_cat_spices_seasonings | \n",
+ " prod_cat_tea | \n",
+ " prod_cat_vitamins_supplements | \n",
+ " prod_cat_yogurt | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 0.9 | \n",
+ " 0.250698 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0.9 | \n",
+ " 0.099806 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 0.3 | \n",
+ " 0.250698 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0.3 | \n",
+ " 0.704001 | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 10 | \n",
+ " 0.3 | \n",
+ " 0.250698 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 24 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bought healthy_activity_last_2m product_health_index \\\n",
+ "0 0 3 0.9 \n",
+ "1 0 1 0.9 \n",
+ "2 1 2 0.3 \n",
+ "3 1 0 0.3 \n",
+ "4 0 10 0.3 \n",
+ "\n",
+ " customer_health_index prod_cat_baby_food_formula \\\n",
+ "0 0.250698 False \n",
+ "1 0.099806 False \n",
+ "2 0.250698 False \n",
+ "3 0.704001 False \n",
+ "4 0.250698 False \n",
+ "\n",
+ " prod_cat_baking_ingredients prod_cat_candy_chocolate \\\n",
+ "0 False False \n",
+ "1 False False \n",
+ "2 False False \n",
+ "3 True False \n",
+ "4 False False \n",
+ "\n",
+ " prod_cat_chips_pretzels prod_cat_cleaning_products prod_cat_coffee ... \\\n",
+ "0 False False False ... \n",
+ "1 False False False ... \n",
+ "2 False False False ... \n",
+ "3 False False False ... \n",
+ "4 False False False ... \n",
+ "\n",
+ " prod_cat_hair_care prod_cat_ice_cream_ice prod_cat_juice_nectars \\\n",
+ "0 False False False \n",
+ "1 False False False \n",
+ "2 False False False \n",
+ "3 False False False \n",
+ "4 False False False \n",
+ "\n",
+ " prod_cat_packaged_cheese prod_cat_refrigerated \\\n",
+ "0 False False \n",
+ "1 False False \n",
+ "2 True False \n",
+ "3 False False \n",
+ "4 True False \n",
+ "\n",
+ " prod_cat_soup_broth_bouillon prod_cat_spices_seasonings prod_cat_tea \\\n",
+ "0 False False False \n",
+ "1 False False False \n",
+ "2 False False False \n",
+ "3 False False False \n",
+ "4 False False False \n",
+ "\n",
+ " prod_cat_vitamins_supplements prod_cat_yogurt \n",
+ "0 True False \n",
+ "1 False False \n",
+ "2 False False \n",
+ "3 False False \n",
+ "4 False False \n",
+ "\n",
+ "[5 rows x 24 columns]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df_rank_features.head()"
]
@@ -571,9 +1432,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 25,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.10/site-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future version. Please use 'DataFrame.transpose' instead.\n",
+ " return bound(*args, **kwds)\n"
+ ]
+ }
+ ],
"source": [
"train_data, validation_data, _ = np.split(df_rank_features.sample(frac=1, random_state=1729), [int(0.7 * len(df_rank_features)), int(0.9 * len(df_rank_features))])\n",
"train_data.to_csv('train.csv', header=False, index=False)\n",
@@ -589,8 +1461,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 26,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"boto3.Session().resource('s3').Bucket(default_bucket).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')\n",
@@ -619,11 +1493,157 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {
- "scrolled": true
+ "scrolled": true,
+ "tags": []
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.\n",
+ "INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-05-29-00-40-27-642\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2024-05-29 00:40:27 Starting - Starting the training job...\n",
+ "2024-05-29 00:40:43 Starting - Preparing the instances for training...\n",
+ "2024-05-29 00:41:13 Downloading - Downloading input data...\n",
+ "2024-05-29 00:41:33 Downloading - Downloading the training image......\n",
+ "2024-05-29 00:42:34 Training - Training image download completed. Training in progress..\u001b[34m[2024-05-29 00:42:51.157 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Imported framework sagemaker_xgboost_container.training\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.\u001b[0m\n",
+ "\u001b[34mReturning the value itself\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] No GPUs detected (normal if no gpus installed)\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Running XGBoost Sagemaker in algorithm mode\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Single node training.\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Train matrix has 139965 rows and 23 columns\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Validation matrix has 39990 rows\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:42:51.475 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO json_config.py:91] Creating hook from json_config at /opt/ml/input/config/debughookconfig.json.\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:42:51.476 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO hook.py:201] tensorboard_dir has not been set for the hook. SMDebug will not be exporting tensorboard summaries.\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:42:51.476 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO profiler_config_parser.py:102] User has disabled profiler.\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:42:51.477 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO hook.py:255] Saving to /opt/ml/output/tensors\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:42:51.477 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO state_store.py:77] The checkpoint config file /opt/ml/input/config/checkpointconfig.json does not exist.\u001b[0m\n",
+ "\u001b[34m[2024-05-29:00:42:51:INFO] Debug hook created from config\u001b[0m\n",
+ "\u001b[34m[00:42:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 4 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[0]#011train-error:0.06857#011validation-error:0.06824\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:42:51.628 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO hook.py:423] Monitoring the collections: metrics\u001b[0m\n",
+ "\u001b[34m[2024-05-29 00:42:51.632 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO hook.py:486] Hook is writing from the hook with pid: 7\u001b[0m\n",
+ "\u001b[34m[00:42:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[1]#011train-error:0.06822#011validation-error:0.06864\u001b[0m\n",
+ "\u001b[34m[00:42:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[2]#011train-error:0.06740#011validation-error:0.06917\u001b[0m\n",
+ "\u001b[34m[00:42:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[3]#011train-error:0.06745#011validation-error:0.06917\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[4]#011train-error:0.06751#011validation-error:0.06839\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 36 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[5]#011train-error:0.06785#011validation-error:0.06799\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[6]#011train-error:0.06742#011validation-error:0.06842\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[7]#011train-error:0.06760#011validation-error:0.06774\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[8]#011train-error:0.06757#011validation-error:0.06744\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[9]#011train-error:0.06754#011validation-error:0.06757\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 32 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[10]#011train-error:0.06696#011validation-error:0.06844\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 4 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[11]#011train-error:0.06698#011validation-error:0.06869\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 22 pruned nodes, max_depth=4\u001b[0m\n",
+ "\u001b[34m[12]#011train-error:0.06699#011validation-error:0.06877\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[13]#011train-error:0.06689#011validation-error:0.06832\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 34 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[14]#011train-error:0.06695#011validation-error:0.06822\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[15]#011train-error:0.06667#011validation-error:0.06832\u001b[0m\n",
+ "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[16]#011train-error:0.06672#011validation-error:0.06817\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 36 pruned nodes, max_depth=4\u001b[0m\n",
+ "\u001b[34m[17]#011train-error:0.06657#011validation-error:0.06807\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 0 extra nodes, 12 pruned nodes, max_depth=0\u001b[0m\n",
+ "\u001b[34m[18]#011train-error:0.06657#011validation-error:0.06807\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[19]#011train-error:0.06667#011validation-error:0.06819\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 18 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[20]#011train-error:0.06650#011validation-error:0.06834\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 28 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[21]#011train-error:0.06630#011validation-error:0.06879\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[22]#011train-error:0.06617#011validation-error:0.06882\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 16 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[23]#011train-error:0.06613#011validation-error:0.06862\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[24]#011train-error:0.06610#011validation-error:0.06864\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 38 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[25]#011train-error:0.06610#011validation-error:0.06929\u001b[0m\n",
+ "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[26]#011train-error:0.06607#011validation-error:0.06944\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 16 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[27]#011train-error:0.06610#011validation-error:0.06922\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[28]#011train-error:0.06607#011validation-error:0.06939\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 20 pruned nodes, max_depth=4\u001b[0m\n",
+ "\u001b[34m[29]#011train-error:0.06607#011validation-error:0.06949\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[30]#011train-error:0.06622#011validation-error:0.06989\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[31]#011train-error:0.06612#011validation-error:0.07002\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 34 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[32]#011train-error:0.06605#011validation-error:0.06967\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 32 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[33]#011train-error:0.06599#011validation-error:0.06952\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 0 extra nodes, 18 pruned nodes, max_depth=0\u001b[0m\n",
+ "\u001b[34m[34]#011train-error:0.06597#011validation-error:0.06947\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 18 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[35]#011train-error:0.06589#011validation-error:0.06967\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[36]#011train-error:0.06611#011validation-error:0.06929\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[37]#011train-error:0.06593#011validation-error:0.06982\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[38]#011train-error:0.06589#011validation-error:0.06967\u001b[0m\n",
+ "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 18 pruned nodes, max_depth=4\u001b[0m\n",
+ "\u001b[34m[39]#011train-error:0.06594#011validation-error:0.06969\u001b[0m\n",
+ "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 30 pruned nodes, max_depth=4\u001b[0m\n",
+ "\u001b[34m[40]#011train-error:0.06577#011validation-error:0.06984\u001b[0m\n",
+ "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 0 extra nodes, 22 pruned nodes, max_depth=0\u001b[0m\n",
+ "\u001b[34m[41]#011train-error:0.06582#011validation-error:0.06982\u001b[0m\n",
+ "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[42]#011train-error:0.06562#011validation-error:0.06999\u001b[0m\n",
+ "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 4 extra nodes, 22 pruned nodes, max_depth=2\u001b[0m\n",
+ "\u001b[34m[43]#011train-error:0.06562#011validation-error:0.06999\u001b[0m\n",
+ "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 20 pruned nodes, max_depth=4\u001b[0m\n",
+ "\u001b[34m[44]#011train-error:0.06554#011validation-error:0.06987\u001b[0m\n",
+ "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[45]#011train-error:0.06560#011validation-error:0.06947\u001b[0m\n",
+ "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 24 pruned nodes, max_depth=4\u001b[0m\n",
+ "\u001b[34m[46]#011train-error:0.06555#011validation-error:0.06957\u001b[0m\n",
+ "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[47]#011train-error:0.06557#011validation-error:0.07034\u001b[0m\n",
+ "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[48]#011train-error:0.06561#011validation-error:0.07037\u001b[0m\n",
+ "\u001b[34m[00:42:56] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 16 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
+ "\u001b[34m[49]#011train-error:0.06568#011validation-error:0.07027\u001b[0m\n",
+ "\n",
+ "2024-05-29 00:43:18 Uploading - Uploading generated training model\n",
+ "2024-05-29 00:43:18 Completed - Training job completed\n",
+ "Training seconds: 124\n",
+ "Billable seconds: 124\n"
+ ]
+ }
+ ],
"source": [
"container = sagemaker.image_uris.retrieve('xgboost', region, version='1.2-2')\n",
"\n",
@@ -664,9 +1684,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 28,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:sagemaker:Creating model with name: sagemaker-xgboost-2024-05-29-00-43-39-427\n",
+ "INFO:sagemaker:Creating endpoint-config with name recsys-rerank-model-05-29-00-34\n",
+ "INFO:sagemaker:Creating endpoint with name recsys-rerank-model-05-29-00-34\n"
+ ]
+ }
+ ],
"source": [
"xgb_predictor = xgb.deploy(\n",
" endpoint_name = ranking_model_endpoint_name,\n",
@@ -679,9 +1711,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 29,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'recsys-rerank-model-05-29-00-34'"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"xgb_predictor.endpoint_name"
]
@@ -695,8 +1740,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 30,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def top_rated_products_by_customer_state(customer_id, top_n):\n",
@@ -735,9 +1782,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 32,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Stored 'cf_inference_payload' (ndarray)\n",
+ "Stored 'cf_inference_df' (DataFrame)\n",
+ "date and time: 29/05/2024 00:44:47\n"
+ ]
+ }
+ ],
"source": [
"customer_id = 'C3571'\n",
"cf_inference_df = top_rated_products_by_customer_state(customer_id, 15)\n",
@@ -760,14 +1819,619 @@
}
],
"metadata": {
- "instance_type": "ml.t3.medium",
+ "availableInstances": [
+ {
+ "_defaultOrder": 0,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.t3.medium",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 1,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.t3.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 2,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.t3.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 3,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.t3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 4,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 5,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 6,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 7,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 8,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 9,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 10,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 11,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 12,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5d.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 13,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5d.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 14,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5d.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 15,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5d.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 16,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5d.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 17,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5d.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 18,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5d.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 19,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 20,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": true,
+ "memoryGiB": 0,
+ "name": "ml.geospatial.interactive",
+ "supportedImageNames": [
+ "sagemaker-geospatial-v1-0"
+ ],
+ "vcpuNum": 0
+ },
+ {
+ "_defaultOrder": 21,
+ "_isFastLaunch": true,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.c5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 22,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.c5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 23,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.c5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 24,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.c5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 25,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 72,
+ "name": "ml.c5.9xlarge",
+ "vcpuNum": 36
+ },
+ {
+ "_defaultOrder": 26,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 96,
+ "name": "ml.c5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 27,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 144,
+ "name": "ml.c5.18xlarge",
+ "vcpuNum": 72
+ },
+ {
+ "_defaultOrder": 28,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.c5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 29,
+ "_isFastLaunch": true,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g4dn.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 30,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g4dn.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 31,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g4dn.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 32,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g4dn.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 33,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g4dn.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 34,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g4dn.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 35,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 61,
+ "name": "ml.p3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 36,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 244,
+ "name": "ml.p3.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 37,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 488,
+ "name": "ml.p3.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 38,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.p3dn.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 39,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.r5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 40,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.r5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 41,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.r5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 42,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.r5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 43,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.r5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 44,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.r5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 45,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.r5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 46,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.r5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 47,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 48,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 49,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 50,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 51,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 52,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 53,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.g5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 54,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.g5.48xlarge",
+ "vcpuNum": 192
+ },
+ {
+ "_defaultOrder": 55,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 56,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4de.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 57,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.trn1.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 58,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1.32xlarge",
+ "vcpuNum": 128
+ },
+ {
+ "_defaultOrder": 59,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1n.32xlarge",
+ "vcpuNum": 128
+ }
+ ],
+ "instance_type": "ml.m5.large",
"interpreter": {
"hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322"
},
"kernelspec": {
- "display_name": "Python 3 (Data Science)",
+ "display_name": "Python 3 (Data Science 3.0)",
"language": "python",
- "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
+ "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1"
},
"language_info": {
"codemirror_mode": {
@@ -779,7 +2443,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.10"
+ "version": "3.10.6"
}
},
"nbformat": 4,
diff --git a/3_click_stream_kinesis.ipynb b/3_click_stream_kinesis.ipynb
index 216671b..a3d1f99 100644
--- a/3_click_stream_kinesis.ipynb
+++ b/3_click_stream_kinesis.ipynb
@@ -33,8 +33,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 6,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"import pandas as pd\n",
@@ -61,8 +63,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 7,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"role = sagemaker.get_execution_role()\n",
@@ -85,8 +89,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 8,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"current_time = time.strftime(\"%m-%d-%H-%M-%S\", time.localtime())\n",
@@ -105,8 +111,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 9,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"parameters = ps.read()\n",
@@ -118,9 +126,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 10,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "date and time: 29/05/2024 00:44:52\n"
+ ]
+ }
+ ],
"source": [
"ps.add({'kinesis_stream_name': kinesis_stream_name,\n",
" 'kinesis_analytics_application_name': kinesis_analytics_application_name})\n",
@@ -143,9 +161,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 11,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Waiting for the Kinesis stream to become active...\n",
+ "ACTIVE\n",
+ "Amazon kinesis stream arn: arn:aws:kinesis:ap-southeast-2:XXXXXXXXXXXX:stream/fs-click-stream-activity-05-29-00-44-52\n"
+ ]
+ }
+ ],
"source": [
"kinesis_client = boto3.client('kinesis')\n",
"kinesis_client.create_stream(StreamName=kinesis_stream_name, ShardCount=1)\n",
@@ -177,8 +207,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 12,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"kda_client = boto3.client('kinesisanalytics')"
@@ -186,8 +218,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 13,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"sql_code = '''\n",
@@ -217,8 +251,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 14,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"kda_input_schema = [{\n",
@@ -270,18 +306,113 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 15,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mjson\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mbase64\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36msubprocess\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mos\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36msys\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mdatetime\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m datetime\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mtime\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mboto3\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[36mprint\u001b[39;49;00m(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mboto3 version: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mboto3.__version__\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mtry\u001b[39;49;00m:\u001b[37m\u001b[39;49;00m\n",
+ " sm = boto3.Session().client(service_name=\u001b[33m\"\u001b[39;49;00m\u001b[33msagemaker\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n",
+ " sm_fs = boto3.Session().client(service_name=\u001b[33m\"\u001b[39;49;00m\u001b[33msagemaker-featurestore-runtime\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mexcept\u001b[39;49;00m:\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[36mprint\u001b[39;49;00m(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mFailed while connecting to SageMaker Feature Store\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[36mprint\u001b[39;49;00m(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mUnexpected error: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00msys.exc_info()[\u001b[34m0\u001b[39;49;00m]\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m# Read Environment Vars\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ "CUSTOMER_ACTIVITY_FEATURE_GROUP = os.environ[\u001b[33m\"\u001b[39;49;00m\u001b[33mclick_stream_feature_group_name\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mdef\u001b[39;49;00m \u001b[32mingest_record\u001b[39;49;00m(\u001b[37m\u001b[39;49;00m\n",
+ " fg_name, customer_id, sum_activity_weight_last_2m, avg_product_health_index_last_2m\u001b[37m\u001b[39;49;00m\n",
+ "):\u001b[37m\u001b[39;49;00m\n",
+ " record = [\u001b[37m\u001b[39;49;00m\n",
+ " {\u001b[33m\"\u001b[39;49;00m\u001b[33mFeatureName\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33mcustomer_id\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[33m\"\u001b[39;49;00m\u001b[33mValueAsString\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[36mstr\u001b[39;49;00m(customer_id)},\u001b[37m\u001b[39;49;00m\n",
+ " {\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[33m\"\u001b[39;49;00m\u001b[33mFeatureName\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33msum_activity_weight_last_2m\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m,\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[33m\"\u001b[39;49;00m\u001b[33mValueAsString\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[36mstr\u001b[39;49;00m(sum_activity_weight_last_2m),\u001b[37m\u001b[39;49;00m\n",
+ " },\u001b[37m\u001b[39;49;00m\n",
+ " {\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[33m\"\u001b[39;49;00m\u001b[33mFeatureName\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33mavg_product_health_index_last_2m\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m,\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[33m\"\u001b[39;49;00m\u001b[33mValueAsString\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[36mstr\u001b[39;49;00m(avg_product_health_index_last_2m),\u001b[37m\u001b[39;49;00m\n",
+ " },\u001b[37m\u001b[39;49;00m\n",
+ " {\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[33m\"\u001b[39;49;00m\u001b[33mFeatureName\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33mevent_time\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m,\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[33m\"\u001b[39;49;00m\u001b[33mValueAsString\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[36mstr\u001b[39;49;00m(\u001b[36mint\u001b[39;49;00m(\u001b[36mround\u001b[39;49;00m(time.time()))),\u001b[37m\u001b[39;49;00m\n",
+ " },\u001b[37m\u001b[39;49;00m\n",
+ " ]\u001b[37m\u001b[39;49;00m\n",
+ " sm_fs.put_record(FeatureGroupName=fg_name, Record=record)\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[34mreturn\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[34mdef\u001b[39;49;00m \u001b[32mlambda_handler\u001b[39;49;00m(event, context):\u001b[37m\u001b[39;49;00m\n",
+ " inv_id = event[\u001b[33m\"\u001b[39;49;00m\u001b[33minvocationId\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n",
+ " app_arn = event[\u001b[33m\"\u001b[39;49;00m\u001b[33mapplicationArn\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n",
+ " records = event[\u001b[33m\"\u001b[39;49;00m\u001b[33mrecords\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[36mprint\u001b[39;49;00m(\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mReceived \u001b[39;49;00m\u001b[33m{\u001b[39;49;00m\u001b[36mlen\u001b[39;49;00m(records)\u001b[33m}\u001b[39;49;00m\u001b[33m records, invocation id: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00minv_id\u001b[33m}\u001b[39;49;00m\u001b[33m, app arn: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mapp_arn\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ " )\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ " ret_records = []\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[34mfor\u001b[39;49;00m rec \u001b[35min\u001b[39;49;00m records:\u001b[37m\u001b[39;49;00m\n",
+ " data = rec[\u001b[33m\"\u001b[39;49;00m\u001b[33mdata\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n",
+ " agg_data_str = base64.b64decode(data)\u001b[37m\u001b[39;49;00m\n",
+ " agg_data = json.loads(agg_data_str)\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[36mprint\u001b[39;49;00m(agg_data)\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ " customer_id = agg_data[\u001b[33m\"\u001b[39;49;00m\u001b[33mCUSTOMER_ID\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n",
+ " sum_activity_weight_last_2m = agg_data[\u001b[33m\"\u001b[39;49;00m\u001b[33mSUM_ACTIVITY_WEIGHT_LAST_2M\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n",
+ " avg_product_health_index_last_2m = agg_data[\u001b[33m\"\u001b[39;49;00m\u001b[33mAVG_PRODUCT_HEALTH_INDEX_LAST_2M\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[36mprint\u001b[39;49;00m(\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mUpdating agg features for customerId: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mcustomer_id\u001b[33m}\u001b[39;49;00m\u001b[33m, Sum of activity weight last 2m: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00msum_activity_weight_last_2m\u001b[33m}\u001b[39;49;00m\u001b[33m, Average product health index last 2m: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mavg_product_health_index_last_2m\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ " )\u001b[37m\u001b[39;49;00m\n",
+ " ingest_record(\u001b[37m\u001b[39;49;00m\n",
+ " CUSTOMER_ACTIVITY_FEATURE_GROUP,\u001b[37m\u001b[39;49;00m\n",
+ " customer_id,\u001b[37m\u001b[39;49;00m\n",
+ " sum_activity_weight_last_2m,\u001b[37m\u001b[39;49;00m\n",
+ " avg_product_health_index_last_2m,\u001b[37m\u001b[39;49;00m\n",
+ " )\u001b[37m\u001b[39;49;00m\n",
+ "\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[37m# Flag each record as being \"Ok\", so that Kinesis won't try to re-send\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
+ " ret_records.append({\u001b[33m\"\u001b[39;49;00m\u001b[33mrecordId\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: rec[\u001b[33m\"\u001b[39;49;00m\u001b[33mrecordId\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m], \u001b[33m\"\u001b[39;49;00m\u001b[33mresult\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33mOk\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m})\u001b[37m\u001b[39;49;00m\n",
+ " \u001b[34mreturn\u001b[39;49;00m {\u001b[33m\"\u001b[39;49;00m\u001b[33mrecords\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: ret_records}\u001b[37m\u001b[39;49;00m\n"
+ ]
+ }
+ ],
"source": [
"!pygmentize ./scripts/lambda-stream.py"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 16,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Lambda function arn: arn:aws:lambda:ap-southeast-2:XXXXXXXXXXXX:function:click-stream-aggregator-lambda05-29-00-44-52\n"
+ ]
+ }
+ ],
"source": [
"lambda_function = Lambda(\n",
" function_name=lambda_name,\n",
@@ -295,7 +426,8 @@
"lambda_function_response = lambda_function.create()\n",
"lambda_function_arn = lambda_function_response['FunctionArn']\n",
"\n",
- "print(f'Lambda function arn: {lambda_function_arn}')"
+ "print(f'Lambda function arn: {lambda_function_arn}')\n",
+ "time.sleep(5)\n"
]
},
{
@@ -307,9 +439,56 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 17,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'ResponseMetadata': {'RequestId': '5aeb83d7-25da-4b4b-8cc1-914f3c23273e',\n",
+ " 'HTTPStatusCode': 200,\n",
+ " 'HTTPHeaders': {'date': 'Wed, 29 May 2024 00:45:20 GMT',\n",
+ " 'content-type': 'application/json',\n",
+ " 'content-length': '1573',\n",
+ " 'connection': 'keep-alive',\n",
+ " 'x-amzn-requestid': '5aeb83d7-25da-4b4b-8cc1-914f3c23273e'},\n",
+ " 'RetryAttempts': 0},\n",
+ " 'FunctionName': 'click-stream-aggregator-lambda05-29-00-44-52',\n",
+ " 'FunctionArn': 'arn:aws:lambda:ap-southeast-2:XXXXXXXXXXXX:function:click-stream-aggregator-lambda05-29-00-44-52',\n",
+ " 'Runtime': 'python3.8',\n",
+ " 'Role': 'arn:aws:iam::XXXXXXXXXXXX:role/service-role/AmazonSageMaker-ExecutionRole-20221031T192874',\n",
+ " 'Handler': 'lambda-stream.lambda_handler',\n",
+ " 'CodeSize': 2626,\n",
+ " 'Description': '',\n",
+ " 'Timeout': 600,\n",
+ " 'MemorySize': 10240,\n",
+ " 'LastModified': '2024-05-29T00:45:20.000+0000',\n",
+ " 'CodeSha256': '4MK66qjsy14fTFs3ZPbvrAssA13JG6abyFNz27wqlfI=',\n",
+ " 'Version': '$LATEST',\n",
+ " 'Environment': {'Variables': {'click_stream_feature_group_name': 'recsys-click-stream-fg-05-29-00-10'}},\n",
+ " 'TracingConfig': {'Mode': 'PassThrough'},\n",
+ " 'RevisionId': '68980d47-1acc-48a5-b09e-2d8675919a95',\n",
+ " 'Layers': [],\n",
+ " 'State': 'Active',\n",
+ " 'LastUpdateStatus': 'InProgress',\n",
+ " 'LastUpdateStatusReason': 'The function is being created.',\n",
+ " 'LastUpdateStatusReasonCode': 'Creating',\n",
+ " 'PackageType': 'Zip',\n",
+ " 'Architectures': ['x86_64'],\n",
+ " 'EphemeralStorage': {'Size': 512},\n",
+ " 'SnapStart': {'ApplyOn': 'None', 'OptimizationStatus': 'Off'},\n",
+ " 'RuntimeVersionConfig': {'RuntimeVersionArn': 'arn:aws:lambda:ap-southeast-2::runtime:f4a0b40874efd83bc0930836198f794b8c0cea2e4e864a3dab58e98fa481131e'},\n",
+ " 'LoggingConfig': {'LogFormat': 'Text',\n",
+ " 'LogGroup': '/aws/lambda/click-stream-aggregator-lambda05-29-00-44-52'}}"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"lambda_client = boto3.client('lambda')\n",
"lambda_client.update_function_configuration(FunctionName=lambda_name,\n",
@@ -329,9 +508,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 18,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "KDA output schema: [{'LambdaOutput': {'ResourceARN': 'arn:aws:lambda:ap-southeast-2:XXXXXXXXXXXX:function:click-stream-aggregator-lambda05-29-00-44-52', 'RoleARN': 'arn:aws:iam::XXXXXXXXXXXX:role/service-role/AmazonSageMaker-ExecutionRole-20221031T192874'}, 'Name': 'DESTINATION_SQL_STREAM', 'DestinationSchema': {'RecordFormatType': 'JSON'}}]\n"
+ ]
+ }
+ ],
"source": [
"kda_output_schema = [{'LambdaOutput': {'ResourceARN': lambda_function_arn, 'RoleARN': role},\n",
" 'Name': 'DESTINATION_SQL_STREAM',\n",
@@ -348,9 +537,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 19,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "READY\n"
+ ]
+ }
+ ],
"source": [
"creating_app = False\n",
"while not creating_app:\n",
@@ -376,9 +575,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 20,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'ResponseMetadata': {'RequestId': 'e9a391a1-b821-4e67-bdfe-ac274f9df484',\n",
+ " 'HTTPStatusCode': 200,\n",
+ " 'HTTPHeaders': {'x-amzn-requestid': 'e9a391a1-b821-4e67-bdfe-ac274f9df484',\n",
+ " 'content-type': 'application/x-amz-json-1.1',\n",
+ " 'content-length': '2',\n",
+ " 'date': 'Wed, 29 May 2024 00:45:20 GMT'},\n",
+ " 'RetryAttempts': 0}}"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"kda_client.start_application(ApplicationName=kinesis_analytics_application_name,\n",
" InputConfigurations=[{'Id': '1.1',\n",
@@ -395,9 +613,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 21,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Waiting for the Kinesis Application to be in RUNNING state...\n",
+ "Waiting for the Kinesis Application to be in RUNNING state...\n",
+ "Waiting for the Kinesis Application to be in RUNNING state...\n",
+ "RUNNING\n"
+ ]
+ }
+ ],
"source": [
"running_app = False\n",
"while not running_app:\n",
@@ -419,8 +650,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 22,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def generate_click_stream_data(customer_id, product_health_index_low, product_health_index_high):\n",
@@ -491,9 +724,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 23,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'event_time': '2024-05-29T00:46:11.125655', 'customer_id': 'C3571', 'product_id': 'P1155', 'product_category': 'baking_ingredients', 'activity_type': 'added_to_cart', 'activity_weight': 2, 'product_health_index': 0.3}\n",
+ "{'event_time': '2024-05-29T00:46:16.706036', 'customer_id': 'C3571', 'product_id': 'P629', 'product_category': 'packaged_cheese', 'activity_type': 'saved_for_later', 'activity_weight': 2, 'product_health_index': 0.3}\n",
+ "{'event_time': '2024-05-29T00:46:22.111948', 'customer_id': 'C3571', 'product_id': 'P13123', 'product_category': 'baking_ingredients', 'activity_type': 'liked', 'activity_weight': 1, 'product_health_index': 0.3}\n",
+ "{'event_time': '2024-05-29T00:46:27.493559', 'customer_id': 'C3571', 'product_id': 'P14170', 'product_category': 'ice_cream_ice', 'activity_type': 'added_to_wish_list', 'activity_weight': 1, 'product_health_index': 0.1}\n",
+ "{'event_time': '2024-05-29T00:46:32.890340', 'customer_id': 'C3571', 'product_id': 'P10430', 'product_category': 'chips_pretzels', 'activity_type': 'liked', 'activity_weight': 1, 'product_health_index': 0.2}\n",
+ "{'event_time': '2024-05-29T00:46:38.264964', 'customer_id': 'C3571', 'product_id': 'P2970', 'product_category': 'chips_pretzels', 'activity_type': 'saved_for_later', 'activity_weight': 2, 'product_health_index': 0.2}\n"
+ ]
+ }
+ ],
"source": [
"put_records_in_kinesis_stream(inference_customer_id, 0.1, 0.3)\n",
"# It takes 2 minutes for KDA to call lambda to update feature store \n",
@@ -510,9 +758,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 24,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Online feature store data for customer id C3571\n",
+ "Record: {'ResponseMetadata': {'RequestId': '6b91b1a3-af4c-4dcf-a710-d38365702840', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '6b91b1a3-af4c-4dcf-a710-d38365702840', 'content-type': 'application/json', 'content-length': '396', 'date': 'Wed, 29 May 2024 00:48:53 GMT'}, 'RetryAttempts': 0}, 'Record': [{'FeatureName': 'customer_id', 'ValueAsString': 'C3571'}, {'FeatureName': 'sum_activity_weight_last_2m', 'ValueAsString': '9'}, {'FeatureName': 'avg_product_health_index_last_2m', 'ValueAsString': '0.23333333333333336'}, {'FeatureName': 'event_time', 'ValueAsString': '1716943709'}]}\n"
+ ]
+ }
+ ],
"source": [
"record = featurestore_runtime.get_record(FeatureGroupName=click_stream_feature_group_name,\n",
" RecordIdentifierValueAsString=inference_customer_id)\n",
@@ -529,14 +788,619 @@
}
],
"metadata": {
- "instance_type": "ml.t3.medium",
+ "availableInstances": [
+ {
+ "_defaultOrder": 0,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.t3.medium",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 1,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.t3.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 2,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.t3.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 3,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.t3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 4,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 5,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 6,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 7,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 8,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 9,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 10,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 11,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 12,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5d.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 13,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5d.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 14,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5d.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 15,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5d.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 16,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5d.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 17,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5d.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 18,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5d.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 19,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 20,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": true,
+ "memoryGiB": 0,
+ "name": "ml.geospatial.interactive",
+ "supportedImageNames": [
+ "sagemaker-geospatial-v1-0"
+ ],
+ "vcpuNum": 0
+ },
+ {
+ "_defaultOrder": 21,
+ "_isFastLaunch": true,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.c5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 22,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.c5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 23,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.c5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 24,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.c5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 25,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 72,
+ "name": "ml.c5.9xlarge",
+ "vcpuNum": 36
+ },
+ {
+ "_defaultOrder": 26,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 96,
+ "name": "ml.c5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 27,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 144,
+ "name": "ml.c5.18xlarge",
+ "vcpuNum": 72
+ },
+ {
+ "_defaultOrder": 28,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.c5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 29,
+ "_isFastLaunch": true,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g4dn.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 30,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g4dn.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 31,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g4dn.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 32,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g4dn.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 33,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g4dn.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 34,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g4dn.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 35,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 61,
+ "name": "ml.p3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 36,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 244,
+ "name": "ml.p3.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 37,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 488,
+ "name": "ml.p3.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 38,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.p3dn.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 39,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.r5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 40,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.r5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 41,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.r5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 42,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.r5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 43,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.r5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 44,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.r5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 45,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.r5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 46,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.r5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 47,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 48,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 49,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 50,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 51,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 52,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 53,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.g5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 54,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.g5.48xlarge",
+ "vcpuNum": 192
+ },
+ {
+ "_defaultOrder": 55,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 56,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4de.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 57,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.trn1.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 58,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1.32xlarge",
+ "vcpuNum": 128
+ },
+ {
+ "_defaultOrder": 59,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1n.32xlarge",
+ "vcpuNum": 128
+ }
+ ],
+ "instance_type": "ml.m5.large",
"interpreter": {
"hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322"
},
"kernelspec": {
- "display_name": "Python 3 (Data Science)",
+ "display_name": "Python 3 (Data Science 3.0)",
"language": "python",
- "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
+ "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1"
},
"language_info": {
"codemirror_mode": {
@@ -548,7 +1412,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.10"
+ "version": "3.10.6"
}
},
"nbformat": 4,
diff --git a/4_realtime_recommendations.ipynb b/4_realtime_recommendations.ipynb
index f731f13..7e90b9f 100644
--- a/4_realtime_recommendations.ipynb
+++ b/4_realtime_recommendations.ipynb
@@ -34,9 +34,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 40,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (2.2.2)\n",
+ "Requirement already satisfied: numpy>=1.22.4 in /opt/conda/lib/python3.10/site-packages (from pandas) (1.26.4)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas) (2.9.0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas) (2024.1)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.10/site-packages (from pandas) (2024.1)\n",
+ "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
"source": [
"!pip install --upgrade pandas"
]
@@ -50,8 +67,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 41,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"import sagemaker\n",
@@ -77,8 +96,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 42,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"role = sagemaker.get_execution_role()\n",
@@ -92,8 +113,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 43,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"query_results= 'sagemaker-recsys-featurestore-workshop'\n",
@@ -109,8 +132,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 45,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"parameters = ps.read()\n",
@@ -153,8 +178,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 46,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"# Make sure model has finished deploying\n",
@@ -171,8 +198,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 47,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"# Pass in our cached data as input to the Collaborative Filtering model\n",
@@ -195,9 +224,188 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 48,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " index | \n",
+ " customer_id | \n",
+ " product_id | \n",
+ " state | \n",
+ " age | \n",
+ " is_married | \n",
+ " product_name | \n",
+ " predictions | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 6 | \n",
+ " C3571 | \n",
+ " P6176 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " pretzel \\\"shells\\\" | \n",
+ " 1.723941 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 8 | \n",
+ " C3571 | \n",
+ " P11086 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " almonds mini nut-thins cheddar cheese | \n",
+ " 1.670082 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " C3571 | \n",
+ " P16823 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " pirouette chocolate fudge creme filled wafers | \n",
+ " 1.640127 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 12 | \n",
+ " C3571 | \n",
+ " P15430 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " organic pork chop seasoning | \n",
+ " 1.639393 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " C3571 | \n",
+ " P6247 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " fruit punch roarin' waters | \n",
+ " 1.587156 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 7 | \n",
+ " C3571 | \n",
+ " P14539 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " chocolate, organic, unsweetened, 100% cacao | \n",
+ " 1.505779 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1 | \n",
+ " C3571 | \n",
+ " P10682 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " mini cakes birthday cake | \n",
+ " 1.501803 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 4 | \n",
+ " C3571 | \n",
+ " P4152 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " white cheddar bunnies | \n",
+ " 1.464058 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 13 | \n",
+ " C3571 | \n",
+ " P7822 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " degreaser | \n",
+ " 1.454165 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 3 | \n",
+ " C3571 | \n",
+ " P5429 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " organic sweet potato puree | \n",
+ " 1.414727 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " index customer_id product_id state age is_married \\\n",
+ "0 6 C3571 P6176 maine 35 0 \n",
+ "1 8 C3571 P11086 maine 35 0 \n",
+ "2 2 C3571 P16823 maine 35 0 \n",
+ "3 12 C3571 P15430 maine 35 0 \n",
+ "4 5 C3571 P6247 maine 35 0 \n",
+ "5 7 C3571 P14539 maine 35 0 \n",
+ "6 1 C3571 P10682 maine 35 0 \n",
+ "7 4 C3571 P4152 maine 35 0 \n",
+ "8 13 C3571 P7822 maine 35 0 \n",
+ "9 3 C3571 P5429 maine 35 0 \n",
+ "\n",
+ " product_name predictions \n",
+ "0 pretzel \\\"shells\\\" 1.723941 \n",
+ "1 almonds mini nut-thins cheddar cheese 1.670082 \n",
+ "2 pirouette chocolate fudge creme filled wafers 1.640127 \n",
+ "3 organic pork chop seasoning 1.639393 \n",
+ "4 fruit punch roarin' waters 1.587156 \n",
+ "5 chocolate, organic, unsweetened, 100% cacao 1.505779 \n",
+ "6 mini cakes birthday cake 1.501803 \n",
+ "7 white cheddar bunnies 1.464058 \n",
+ "8 degreaser 1.454165 \n",
+ "9 organic sweet potato puree 1.414727 "
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"cf_inference_df"
]
@@ -225,8 +433,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 49,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"# Make sure model has finished deploying\n",
@@ -249,8 +459,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 50,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"query = f'''\n",
@@ -265,7 +477,11 @@
"df_one_hot_cat_features = pd.DataFrame(one_hot_cat_features)\n",
"df_one_hot_cat_features.columns = ['product_category']\n",
"\n",
- "df_one_hot_cat_features = pd.concat([df_one_hot_cat_features, pd.get_dummies(df_one_hot_cat_features['product_category'], prefix='cat')],axis=1)"
+ "df_one_hot_cat_features = pd.concat([df_one_hot_cat_features, pd.get_dummies(df_one_hot_cat_features['product_category'], prefix='cat')],axis=1)\n",
+ "\n",
+ "# Convert dummy variables to integers\n",
+ "for col in df_one_hot_cat_features.columns[1:]:\n",
+ " df_one_hot_cat_features[col] = df_one_hot_cat_features[col].astype(int)"
]
},
{
@@ -282,8 +498,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 51,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def get_ranking_model_input_data(df, df_one_hot_cat_features):\n",
@@ -295,7 +513,7 @@
" customer_record = featurestore_runtime.get_record(FeatureGroupName=customers_feature_group_name,\n",
" RecordIdentifierValueAsString=customer_id,\n",
" FeatureNames=['customer_health_index'])\n",
- " \n",
+ " print(customer_record)\n",
" customer_health_index = customer_record['Record'][0]['ValueAsString']\n",
" \n",
" # Get product features (instead of looping, you can optionally use\n",
@@ -323,6 +541,7 @@
" RecordIdentifierValueAsString=customer_id,\n",
" FeatureNames=['sum_activity_weight_last_2m',\n",
" 'avg_product_health_index_last_2m'])\n",
+ " print(click_stream_record)\n",
" \n",
" # Calculate healthy_activity_last_2m as this will influence ranking as well\n",
" sum_activity_weight_last_2m = click_stream_record['Record'][0]['ValueAsString']\n",
@@ -365,9 +584,188 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 52,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " index | \n",
+ " customer_id | \n",
+ " product_id | \n",
+ " state | \n",
+ " age | \n",
+ " is_married | \n",
+ " product_name | \n",
+ " predictions | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 6 | \n",
+ " C3571 | \n",
+ " P6176 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " pretzel \\\"shells\\\" | \n",
+ " 1.723941 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 8 | \n",
+ " C3571 | \n",
+ " P11086 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " almonds mini nut-thins cheddar cheese | \n",
+ " 1.670082 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " C3571 | \n",
+ " P16823 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " pirouette chocolate fudge creme filled wafers | \n",
+ " 1.640127 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 12 | \n",
+ " C3571 | \n",
+ " P15430 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " organic pork chop seasoning | \n",
+ " 1.639393 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " C3571 | \n",
+ " P6247 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " fruit punch roarin' waters | \n",
+ " 1.587156 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 7 | \n",
+ " C3571 | \n",
+ " P14539 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " chocolate, organic, unsweetened, 100% cacao | \n",
+ " 1.505779 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1 | \n",
+ " C3571 | \n",
+ " P10682 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " mini cakes birthday cake | \n",
+ " 1.501803 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 4 | \n",
+ " C3571 | \n",
+ " P4152 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " white cheddar bunnies | \n",
+ " 1.464058 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 13 | \n",
+ " C3571 | \n",
+ " P7822 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " degreaser | \n",
+ " 1.454165 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 3 | \n",
+ " C3571 | \n",
+ " P5429 | \n",
+ " maine | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " organic sweet potato puree | \n",
+ " 1.414727 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " index customer_id product_id state age is_married \\\n",
+ "0 6 C3571 P6176 maine 35 0 \n",
+ "1 8 C3571 P11086 maine 35 0 \n",
+ "2 2 C3571 P16823 maine 35 0 \n",
+ "3 12 C3571 P15430 maine 35 0 \n",
+ "4 5 C3571 P6247 maine 35 0 \n",
+ "5 7 C3571 P14539 maine 35 0 \n",
+ "6 1 C3571 P10682 maine 35 0 \n",
+ "7 4 C3571 P4152 maine 35 0 \n",
+ "8 13 C3571 P7822 maine 35 0 \n",
+ "9 3 C3571 P5429 maine 35 0 \n",
+ "\n",
+ " product_name predictions \n",
+ "0 pretzel \\\"shells\\\" 1.723941 \n",
+ "1 almonds mini nut-thins cheddar cheese 1.670082 \n",
+ "2 pirouette chocolate fudge creme filled wafers 1.640127 \n",
+ "3 organic pork chop seasoning 1.639393 \n",
+ "4 fruit punch roarin' waters 1.587156 \n",
+ "5 chocolate, organic, unsweetened, 100% cacao 1.505779 \n",
+ "6 mini cakes birthday cake 1.501803 \n",
+ "7 white cheddar bunnies 1.464058 \n",
+ "8 degreaser 1.454165 \n",
+ "9 organic sweet potato puree 1.414727 "
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"cf_inference_df"
]
@@ -381,15 +779,39 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 53,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'ResponseMetadata': {'RequestId': 'a8a788d0-13d3-4a21-9e04-5fd505a2cf3a', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'a8a788d0-13d3-4a21-9e04-5fd505a2cf3a', 'content-type': 'application/json', 'content-length': '131', 'date': 'Wed, 29 May 2024 00:49:52 GMT'}, 'RetryAttempts': 0}, 'Record': [{'FeatureName': 'customer_health_index', 'ValueAsString': '0.0952770902420399'}]}\n",
+ "{'ResponseMetadata': {'RequestId': 'b80bfb05-4a85-46a2-9e3a-97698adfd24d', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'b80bfb05-4a85-46a2-9e3a-97698adfd24d', 'content-type': 'application/json', 'content-length': '234', 'date': 'Wed, 29 May 2024 00:49:53 GMT'}, 'RetryAttempts': 0}, 'Record': [{'FeatureName': 'sum_activity_weight_last_2m', 'ValueAsString': '9'}, {'FeatureName': 'avg_product_health_index_last_2m', 'ValueAsString': '0.23333333333333336'}]}\n"
+ ]
+ }
+ ],
"source": [
"# Construct input data for the ranking model\n",
- "ranking_inference_df = get_ranking_model_input_data(cf_inference_df, df_one_hot_cat_features)\n",
- "\n",
+ "ranking_inference_df = get_ranking_model_input_data(cf_inference_df, df_one_hot_cat_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
"# Get our ranked product recommendations and attach the predictions to the model input\n",
- "ranking_inference_df['propensity_to_buy'] = ranking_model_predictor.predict(ranking_inference_df.to_numpy()).decode('utf-8').split(',')"
+ "# ranking_inference_df['propensity_to_buy'] = ranking_model_predictor.predict(ranking_inference_df.to_numpy()).decode('utf-8').split('\\n')\n",
+ "predictions = ranking_model_predictor.predict(ranking_inference_df.to_numpy()).decode('utf-8').split('\\n')\n",
+ "predictions = [float(p) for p in predictions if p != ''] # Convert to float and remove empty strings\n",
+ "\n",
+ "ranking_inference_df['propensity_to_buy'] = predictions\n"
]
},
{
@@ -401,16 +823,126 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 55,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_id | \n",
+ " product_name | \n",
+ " propensity_to_buy | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " P6176 | \n",
+ " pretzel \\\"shells\\\" | \n",
+ " 0.999014 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " P11086 | \n",
+ " almonds mini nut-thins cheddar cheese | \n",
+ " 0.999014 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " P16823 | \n",
+ " pirouette chocolate fudge creme filled wafers | \n",
+ " 0.999014 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " P6247 | \n",
+ " fruit punch roarin' waters | \n",
+ " 0.999014 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " P4152 | \n",
+ " white cheddar bunnies | \n",
+ " 0.999014 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " P10682 | \n",
+ " mini cakes birthday cake | \n",
+ " 0.999014 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " P5429 | \n",
+ " organic sweet potato puree | \n",
+ " 0.998438 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " P14539 | \n",
+ " chocolate, organic, unsweetened, 100% cacao | \n",
+ " 0.998438 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " P15430 | \n",
+ " organic pork chop seasoning | \n",
+ " 0.439611 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " P7822 | \n",
+ " degreaser | \n",
+ " 0.439611 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " product_id product_name propensity_to_buy\n",
+ "0 P6176 pretzel \\\"shells\\\" 0.999014\n",
+ "1 P11086 almonds mini nut-thins cheddar cheese 0.999014\n",
+ "2 P16823 pirouette chocolate fudge creme filled wafers 0.999014\n",
+ "3 P6247 fruit punch roarin' waters 0.999014\n",
+ "4 P4152 white cheddar bunnies 0.999014\n",
+ "5 P10682 mini cakes birthday cake 0.999014\n",
+ "6 P5429 organic sweet potato puree 0.998438\n",
+ "7 P14539 chocolate, organic, unsweetened, 100% cacao 0.998438\n",
+ "8 P15430 organic pork chop seasoning 0.439611\n",
+ "9 P7822 degreaser 0.439611"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Join all the data back together for inspection\n",
"personalized_recommendations = pd.concat([cf_inference_df[['customer_id', 'product_id', 'product_name']],\n",
" ranking_inference_df[['propensity_to_buy']]], axis=1)\n",
"\n",
"# And sort by propensity to buy\n",
- "personalized_recommendations.sort_values(by='propensity_to_buy', ascending=False)[['product_id','product_name']].reset_index(drop=True).head(5)"
+ "personalized_recommendations.sort_values(by='propensity_to_buy', ascending=False)[['product_id','product_name', 'propensity_to_buy']].reset_index(drop=True).head(10)"
]
},
{
@@ -459,14 +991,619 @@
}
],
"metadata": {
- "instance_type": "ml.t3.medium",
+ "availableInstances": [
+ {
+ "_defaultOrder": 0,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.t3.medium",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 1,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.t3.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 2,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.t3.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 3,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.t3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 4,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 5,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 6,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 7,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 8,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 9,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 10,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 11,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 12,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5d.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 13,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5d.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 14,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5d.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 15,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5d.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 16,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5d.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 17,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5d.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 18,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5d.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 19,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 20,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": true,
+ "memoryGiB": 0,
+ "name": "ml.geospatial.interactive",
+ "supportedImageNames": [
+ "sagemaker-geospatial-v1-0"
+ ],
+ "vcpuNum": 0
+ },
+ {
+ "_defaultOrder": 21,
+ "_isFastLaunch": true,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.c5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 22,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.c5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 23,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.c5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 24,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.c5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 25,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 72,
+ "name": "ml.c5.9xlarge",
+ "vcpuNum": 36
+ },
+ {
+ "_defaultOrder": 26,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 96,
+ "name": "ml.c5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 27,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 144,
+ "name": "ml.c5.18xlarge",
+ "vcpuNum": 72
+ },
+ {
+ "_defaultOrder": 28,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.c5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 29,
+ "_isFastLaunch": true,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g4dn.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 30,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g4dn.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 31,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g4dn.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 32,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g4dn.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 33,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g4dn.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 34,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g4dn.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 35,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 61,
+ "name": "ml.p3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 36,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 244,
+ "name": "ml.p3.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 37,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 488,
+ "name": "ml.p3.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 38,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.p3dn.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 39,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.r5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 40,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.r5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 41,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.r5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 42,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.r5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 43,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.r5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 44,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.r5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 45,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.r5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 46,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.r5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 47,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 48,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 49,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 50,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 51,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 52,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 53,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.g5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 54,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.g5.48xlarge",
+ "vcpuNum": 192
+ },
+ {
+ "_defaultOrder": 55,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 56,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4de.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 57,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.trn1.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 58,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1.32xlarge",
+ "vcpuNum": 128
+ },
+ {
+ "_defaultOrder": 59,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1n.32xlarge",
+ "vcpuNum": 128
+ }
+ ],
+ "instance_type": "ml.m5.large",
"interpreter": {
"hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322"
},
"kernelspec": {
- "display_name": "Python 3 (Data Science)",
+ "display_name": "Python 3 (Data Science 3.0)",
"language": "python",
- "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
+ "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1"
},
"language_info": {
"codemirror_mode": {
@@ -478,7 +1615,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.10"
+ "version": "3.10.6"
}
},
"nbformat": 4,
diff --git a/5_cleanup.ipynb b/5_cleanup.ipynb
index 4ad990b..f232bbd 100644
--- a/5_cleanup.ipynb
+++ b/5_cleanup.ipynb
@@ -27,8 +27,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 29,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"from parameter_store import ParameterStore\n",
@@ -44,8 +46,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 30,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"role = sagemaker.get_execution_role()\n",
@@ -68,8 +72,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 31,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"parameters = ps.read()\n",
@@ -96,9 +102,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 32,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Deleting feature group: recsys-customers-fg-05-28-23-12\n",
+ "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-customers-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n",
+ "Waiting for Feature Group Deletion\n",
+ "Deleting feature group: recsys-products-fg-05-28-23-12\n",
+ "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-products-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n",
+ "Waiting for Feature Group Deletion\n",
+ "Deleting feature group: recsys-orders-fg-05-28-23-12\n",
+ "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-orders-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n",
+ "Waiting for Feature Group Deletion\n",
+ "Deleting feature group: recsys-click-stream-historical-fg-05-28-23-12\n",
+ "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-click-stream-historical-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n",
+ "Waiting for Feature Group Deletion\n",
+ "Deleting feature group: recsys-click-stream-fg-05-28-23-12\n",
+ "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-click-stream-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n",
+ "Waiting for Feature Group Deletion\n"
+ ]
+ }
+ ],
"source": [
"feature_group_list = [customers_feature_group_name, products_feature_group_name,\n",
" orders_feature_group_name, click_stream_historical_feature_group_name,\n",
@@ -118,8 +148,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 33,
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def clean_up_endpoint(endpoint_name):\n",
@@ -133,9 +165,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 34,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Deleting endpoint: recsys-cf-model-05-28-23-35\n",
+ "Deleting endpoint configuration : recsys-cf-model-05-28-23-35\n",
+ "Deleting endpoint: recsys-rerank-model-05-28-23-35\n",
+ "Deleting endpoint configuration : recsys-rerank-model-05-28-23-35\n"
+ ]
+ }
+ ],
"source": [
"endpoint_list = [cf_model_endpoint_name, ranking_model_endpoint_name]\n",
"\n",
@@ -152,9 +197,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 35,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'ResponseMetadata': {'RequestId': 'f78be708-b4a7-d2de-a95b-934960b27d21',\n",
+ " 'HTTPStatusCode': 200,\n",
+ " 'HTTPHeaders': {'x-amzn-requestid': 'f78be708-b4a7-d2de-a95b-934960b27d21',\n",
+ " 'x-amz-id-2': 'XP2LB+4JbsvKcU5fzgjjolKNwu/VscIteAKJME7nnFnYkN/XL9mdTJo8RFXmZu0APzfUXz7F0myVLYKFb1SWXj8qrvucOao8',\n",
+ " 'date': 'Wed, 29 May 2024 00:08:33 GMT',\n",
+ " 'content-type': 'application/x-amz-json-1.1',\n",
+ " 'content-length': '0',\n",
+ " 'connection': 'keep-alive'},\n",
+ " 'RetryAttempts': 0}}"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"kinesis_client.delete_stream(StreamName=kinesis_stream_name,\n",
" EnforceConsumerDeletion=True)"
@@ -169,9 +235,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 36,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'ResponseMetadata': {'RequestId': '334ef27e-7207-485b-bb2b-8b97a99da0f8',\n",
+ " 'HTTPStatusCode': 200,\n",
+ " 'HTTPHeaders': {'x-amzn-requestid': '334ef27e-7207-485b-bb2b-8b97a99da0f8',\n",
+ " 'content-type': 'application/x-amz-json-1.1',\n",
+ " 'content-length': '2',\n",
+ " 'date': 'Wed, 29 May 2024 00:08:33 GMT'},\n",
+ " 'RetryAttempts': 0}}"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"response = kinesis_analytics_client.describe_application(ApplicationName=kinesis_analytics_application_name)\n",
"create_ts = response['ApplicationDetail']['CreateTimestamp']\n",
@@ -187,14 +272,619 @@
}
],
"metadata": {
+ "availableInstances": [
+ {
+ "_defaultOrder": 0,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.t3.medium",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 1,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.t3.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 2,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.t3.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 3,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.t3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 4,
+ "_isFastLaunch": true,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 5,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 6,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 7,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 8,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 9,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 10,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 11,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 12,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.m5d.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 13,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.m5d.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 14,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.m5d.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 15,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.m5d.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 16,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.m5d.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 17,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.m5d.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 18,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.m5d.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 19,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.m5d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 20,
+ "_isFastLaunch": false,
+ "category": "General purpose",
+ "gpuNum": 0,
+ "hideHardwareSpecs": true,
+ "memoryGiB": 0,
+ "name": "ml.geospatial.interactive",
+ "supportedImageNames": [
+ "sagemaker-geospatial-v1-0"
+ ],
+ "vcpuNum": 0
+ },
+ {
+ "_defaultOrder": 21,
+ "_isFastLaunch": true,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 4,
+ "name": "ml.c5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 22,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 8,
+ "name": "ml.c5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 23,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.c5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 24,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.c5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 25,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 72,
+ "name": "ml.c5.9xlarge",
+ "vcpuNum": 36
+ },
+ {
+ "_defaultOrder": 26,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 96,
+ "name": "ml.c5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 27,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 144,
+ "name": "ml.c5.18xlarge",
+ "vcpuNum": 72
+ },
+ {
+ "_defaultOrder": 28,
+ "_isFastLaunch": false,
+ "category": "Compute optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.c5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 29,
+ "_isFastLaunch": true,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g4dn.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 30,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g4dn.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 31,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g4dn.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 32,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g4dn.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 33,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g4dn.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 34,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g4dn.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 35,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 61,
+ "name": "ml.p3.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 36,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 244,
+ "name": "ml.p3.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 37,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 488,
+ "name": "ml.p3.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 38,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.p3dn.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 39,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.r5.large",
+ "vcpuNum": 2
+ },
+ {
+ "_defaultOrder": 40,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.r5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 41,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.r5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 42,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.r5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 43,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.r5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 44,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.r5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 45,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.r5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 46,
+ "_isFastLaunch": false,
+ "category": "Memory Optimized",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.r5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 47,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 16,
+ "name": "ml.g5.xlarge",
+ "vcpuNum": 4
+ },
+ {
+ "_defaultOrder": 48,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.g5.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 49,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 64,
+ "name": "ml.g5.4xlarge",
+ "vcpuNum": 16
+ },
+ {
+ "_defaultOrder": 50,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 128,
+ "name": "ml.g5.8xlarge",
+ "vcpuNum": 32
+ },
+ {
+ "_defaultOrder": 51,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 1,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 256,
+ "name": "ml.g5.16xlarge",
+ "vcpuNum": 64
+ },
+ {
+ "_defaultOrder": 52,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 192,
+ "name": "ml.g5.12xlarge",
+ "vcpuNum": 48
+ },
+ {
+ "_defaultOrder": 53,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 4,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 384,
+ "name": "ml.g5.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 54,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 768,
+ "name": "ml.g5.48xlarge",
+ "vcpuNum": 192
+ },
+ {
+ "_defaultOrder": 55,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4d.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 56,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 8,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 1152,
+ "name": "ml.p4de.24xlarge",
+ "vcpuNum": 96
+ },
+ {
+ "_defaultOrder": 57,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 32,
+ "name": "ml.trn1.2xlarge",
+ "vcpuNum": 8
+ },
+ {
+ "_defaultOrder": 58,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1.32xlarge",
+ "vcpuNum": 128
+ },
+ {
+ "_defaultOrder": 59,
+ "_isFastLaunch": false,
+ "category": "Accelerated computing",
+ "gpuNum": 0,
+ "hideHardwareSpecs": false,
+ "memoryGiB": 512,
+ "name": "ml.trn1n.32xlarge",
+ "vcpuNum": 128
+ }
+ ],
"instance_type": "ml.t3.medium",
"interpreter": {
"hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322"
},
"kernelspec": {
- "display_name": "Python 3 (Data Science)",
+ "display_name": "Python 3 (Data Science 3.0)",
"language": "python",
- "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ca-central-1:310906938811:image/datascience-1.0"
+ "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1"
},
"language_info": {
"codemirror_mode": {
@@ -206,7 +896,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.10"
+ "version": "3.10.6"
}
},
"nbformat": 4,