diff --git a/1_feature_store.ipynb b/1_feature_store.ipynb index 03f35bc..37b26c0 100644 --- a/1_feature_store.ipynb +++ b/1_feature_store.ipynb @@ -46,8 +46,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 24, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import sagemaker\n", @@ -71,16 +73,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 25, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ap-southeast-2\n" + ] + } + ], "source": [ "role = sagemaker.get_execution_role()\n", "sagemaker_session = sagemaker.Session()\n", "default_bucket = sagemaker_session.default_bucket()\n", "region = sagemaker_session.boto_region_name\n", "s3_client = boto3.client('s3', region_name=region)\n", - "\n", + "print(region)\n", "# ParameterStore is a custom utility to save local variable values\n", "# for use across all notebooks\n", "ps = ParameterStore(verbose=False)\n", @@ -89,8 +101,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 26, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "prefix = 'recsys-feature-store'\n", @@ -114,7 +128,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "print(f'Using SageMaker version: {sagemaker.__version__}')\n", @@ -124,9 +140,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 28, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature groups names:\n", + "\n", + "recsys-customers-fg-05-29-00-10\n", + "recsys-products-fg-05-29-00-10\n", + "recsys-orders-fg-05-29-00-10\n", + "recsys-click-stream-historical-fg-05-29-00-10\n", + "recsys-click-stream-fg-05-29-00-10\n" + ] + } + ], "source": [ "print('Feature groups names:\\n')\n", "print(customers_feature_group_name)\n", @@ -152,9 +184,111 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 29, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idnamestateageis_marriedcustomer_health_index
0C1justin gutierrezalaska5210.590238
1C2karen crossidaho2910.622201
2C3amy kingoklahoma7010.225476
3C4nicole hartmanmissouri5210.975817
4C5jessica powersminnesota3110.886133
\n", + "
" + ], + "text/plain": [ + " customer_id name state age is_married \\\n", + "0 C1 justin gutierrez alaska 52 1 \n", + "1 C2 karen cross idaho 29 1 \n", + "2 C3 amy king oklahoma 70 1 \n", + "3 C4 nicole hartman missouri 52 1 \n", + "4 C5 jessica powers minnesota 31 1 \n", + "\n", + " customer_health_index \n", + "0 0.590238 \n", + "1 0.622201 \n", + "2 0.225476 \n", + "3 0.975817 \n", + "4 0.886133 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_customers = pd.read_csv('data/customers.csv')\n", "df_customers.head()" @@ -169,9 +303,99 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 30, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_nameproduct_categoryproduct_idproduct_health_index
0chocolate sandwich cookiescookies_cakesP10.1
1nutter butter cookie bites go-pakcookies_cakesP250.1
2danish butter cookiescookies_cakesP340.1
3gluten free all natural chocolate chip cookiescookies_cakesP550.1
4mini nilla wafers munch packcookies_cakesP990.1
\n", + "
" + ], + "text/plain": [ + " product_name product_category product_id \\\n", + "0 chocolate sandwich cookies cookies_cakes P1 \n", + "1 nutter butter cookie bites go-pak cookies_cakes P25 \n", + "2 danish butter cookies cookies_cakes P34 \n", + "3 gluten free all natural chocolate chip cookies cookies_cakes P55 \n", + "4 mini nilla wafers munch pack cookies_cakes P99 \n", + "\n", + " product_health_index \n", + "0 0.1 \n", + "1 0.1 \n", + "2 0.1 \n", + "3 0.1 \n", + "4 0.1 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_products = pd.read_csv('data/products.csv')\n", "df_products.head()" @@ -186,9 +410,86 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 31, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idproduct_idpurchase_amount
0C1P1085287.71
1C1P10940101.71
2C1P1381842.11
3C1P231055.37
4C1P39355.16
\n", + "
" + ], + "text/plain": [ + " customer_id product_id purchase_amount\n", + "0 C1 P10852 87.71\n", + "1 C1 P10940 101.71\n", + "2 C1 P13818 42.11\n", + "3 C1 P2310 55.37\n", + "4 C1 P393 55.16" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_orders = pd.read_csv('data/orders.csv')\n", "df_orders.head()" @@ -203,9 +504,98 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 32, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idproduct_idboughthealthy_activity_last_2mrating
0C1P10852113.048429
1C3806P10852111.674935
2C5257P10852102.691236
3C8220P10852111.773447
4C1P10852093.048429
\n", + "
" + ], + "text/plain": [ + " customer_id product_id bought healthy_activity_last_2m rating\n", + "0 C1 P10852 1 1 3.048429\n", + "1 C3806 P10852 1 1 1.674935\n", + "2 C5257 P10852 1 0 2.691236\n", + "3 C8220 P10852 1 1 1.773447\n", + "4 C1 P10852 0 9 3.048429" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_click_stream_historical = pd.read_csv('data/click_stream_historical.csv')\n", "df_click_stream_historical.head()" @@ -220,9 +610,72 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 33, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idsum_activity_weight_last_2mavg_product_health_index_last_2m
0C0923480.2
1D1928330.1
2C123490.8
\n", + "
" + ], + "text/plain": [ + " customer_id sum_activity_weight_last_2m avg_product_health_index_last_2m\n", + "0 C09234 8 0.2\n", + "1 D19283 3 0.1\n", + "2 C1234 9 0.8" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Read a sample in order to have a schema for Feature Group creation\n", "df_click_stream = pd.read_csv('data/click_stream.csv')\n", @@ -247,23 +700,244 @@ "For more information, see [Feature Store Concepts](https://docs.aws.amazon.com/sagemaker/latest/dg/feature-store-getting-started.html#feature-store-concepts) and [these docs](https://docs.aws.amazon.com/sagemaker/latest/dg/feature-store-create-feature-group.html)." ] }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "customers_feature_group = FeatureGroup(\n", + " name=customers_feature_group_name, sagemaker_session=sagemaker_session\n", + ")\n", + "orders_feature_group = FeatureGroup(\n", + " name=orders_feature_group_name, sagemaker_session=sagemaker_session\n", + ")\n", + "products_feature_group = FeatureGroup(\n", + " name=products_feature_group_name, sagemaker_session=sagemaker_session\n", + ")\n", + "click_stream_historical_feature_group = FeatureGroup(\n", + " name=click_stream_historical_feature_group_name, sagemaker_session=sagemaker_session\n", + ")\n", + "click_stream_feature_group = FeatureGroup(\n", + " name=click_stream_feature_group_name, sagemaker_session=sagemaker_session\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idsum_activity_weight_last_2mavg_product_health_index_last_2mevent_time
0C0923480.21.716941e+09
1D1928330.11.716941e+09
2C123490.81.716941e+09
\n", + "
" + ], + "text/plain": [ + " customer_id sum_activity_weight_last_2m avg_product_health_index_last_2m \\\n", + "0 C09234 8 0.2 \n", + "1 D19283 3 0.1 \n", + "2 C1234 9 0.8 \n", + "\n", + " event_time \n", + "0 1.716941e+09 \n", + "1 1.716941e+09 \n", + "2 1.716941e+09 " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Event Time\n", + "event_time_feature_name = \"event_time\"\n", + "current_time_sec = int(round(time.time()))\n", + "\n", + "df_customers[event_time_feature_name] = pd.Series([current_time_sec]*len(df_customers), dtype=\"float64\")\n", + "df_orders[event_time_feature_name] = pd.Series([current_time_sec]*len(df_orders), dtype=\"float64\")\n", + "df_products[event_time_feature_name] = pd.Series([current_time_sec]*len(df_products), dtype=\"float64\")\n", + "df_click_stream_historical[event_time_feature_name] = pd.Series([current_time_sec]*len(df_click_stream_historical), dtype=\"float64\")\n", + "df_click_stream[event_time_feature_name] = pd.Series([current_time_sec]*len(df_click_stream), dtype=\"float64\")\n", + "\n", + "df_click_stream.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[FeatureDefinition(feature_name='customer_id', feature_type=, collection_type=None),\n", + " FeatureDefinition(feature_name='sum_activity_weight_last_2m', feature_type=, collection_type=None),\n", + " FeatureDefinition(feature_name='avg_product_health_index_last_2m', feature_type=, collection_type=None),\n", + " FeatureDefinition(feature_name='event_time', feature_type=, collection_type=None)]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load Feature Definitions\n", + "customers_feature_group.load_feature_definitions(data_frame=df_customers)\n", + "orders_feature_group.load_feature_definitions(data_frame=df_orders)\n", + "products_feature_group.load_feature_definitions(data_frame=df_products)\n", + "click_stream_historical_feature_group.load_feature_definitions(data_frame=df_click_stream_historical)\n", + "click_stream_feature_group.load_feature_definitions(data_frame=df_click_stream)" + ] + }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "customers_feature_group = create_feature_group(df_customers, customers_feature_group_name,\n", - " 'customer_id', prefix, sagemaker_session)\n", - "products_feature_group = create_feature_group(df_products, products_feature_group_name, 'product_id',\n", - " prefix, sagemaker_session)\n", - "orders_feature_group = create_feature_group(df_orders, orders_feature_group_name, 'order_id', prefix,\n", - " sagemaker_session)\n", - "click_stream_historical_feature_group = create_feature_group(df_click_stream_historical,\n", - " click_stream_historical_feature_group_name,\n", - " 'click_stream_id', prefix, sagemaker_session)\n", - "click_stream_feature_group = create_feature_group(df_click_stream, click_stream_feature_group_name, 'customer_id',\n", - " prefix, sagemaker_session)" + "# Create Feature Groups\n", + "record_identifier_feature_name = \"customer_id\"\n", + "customers_feature_group.create(\n", + " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n", + " record_identifier_name=record_identifier_feature_name,\n", + " event_time_feature_name=event_time_feature_name,\n", + " role_arn=role,\n", + " enable_online_store=True\n", + ")\n", + "\n", + "orders_feature_group.create(\n", + " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n", + " record_identifier_name=record_identifier_feature_name,\n", + " event_time_feature_name=event_time_feature_name,\n", + " role_arn=role,\n", + " enable_online_store=True\n", + ")\n", + "\n", + "click_stream_historical_feature_group.create(\n", + " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n", + " record_identifier_name=record_identifier_feature_name,\n", + " event_time_feature_name=event_time_feature_name,\n", + " role_arn=role,\n", + " enable_online_store=True\n", + ")\n", + "\n", + "click_stream_feature_group.create(\n", + " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n", + " record_identifier_name=record_identifier_feature_name,\n", + " event_time_feature_name=event_time_feature_name,\n", + " role_arn=role,\n", + " enable_online_store=True\n", + ")\n", + "\n", + "products_record_identifier_feature_name = \"product_id\"\n", + "\n", + "products_feature_group.create(\n", + " s3_uri=f\"s3://{default_bucket}/{prefix}\",\n", + " record_identifier_name=products_record_identifier_feature_name,\n", + " event_time_feature_name=event_time_feature_name,\n", + " role_arn=role,\n", + " enable_online_store=True\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting for Feature Group to be Created\n", + "Waiting for Feature Group to be Created\n", + "FeatureGroup recsys-customers-fg-05-29-00-10 successfully created.\n", + "Waiting for Feature Group to be Created\n", + "FeatureGroup recsys-orders-fg-05-29-00-10 successfully created.\n", + "Waiting for Feature Group to be Created\n", + "FeatureGroup recsys-products-fg-05-29-00-10 successfully created.\n", + "FeatureGroup recsys-click-stream-historical-fg-05-29-00-10 successfully created.\n", + "FeatureGroup recsys-click-stream-fg-05-29-00-10 successfully created.\n" + ] + } + ], + "source": [ + "# Check Feature Groups have finished creating\n", + "def check_feature_group_status(feature_group):\n", + " status = feature_group.describe().get(\"FeatureGroupStatus\")\n", + " while status == \"Creating\":\n", + " print(\"Waiting for Feature Group to be Created\")\n", + " time.sleep(5)\n", + " status = feature_group.describe().get(\"FeatureGroupStatus\")\n", + " print(f\"FeatureGroup {feature_group.name} successfully created.\")\n", + "\n", + "\n", + "check_feature_group_status(customers_feature_group)\n", + "check_feature_group_status(orders_feature_group)\n", + "check_feature_group_status(products_feature_group)\n", + "check_feature_group_status(click_stream_historical_feature_group)\n", + "check_feature_group_status(click_stream_feature_group)" ] }, { @@ -277,22 +951,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 39, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "customers_table = get_feature_group_table_name(customers_feature_group)\n", - "products_table = get_feature_group_table_name(products_feature_group)\n", - "orders_table = get_feature_group_table_name(orders_feature_group)\n", - "click_stream_historical_table = get_feature_group_table_name(click_stream_historical_feature_group)\n", - "click_stream_table = get_feature_group_table_name(click_stream_feature_group)\n", + "# Store table names \n", + "customers_query = customers_feature_group.athena_query()\n", + "customers_table = customers_query.table_name\n", + "\n", + "products_query = products_feature_group.athena_query()\n", + "products_table = products_query.table_name\n", + "\n", + "orders_query = orders_feature_group.athena_query()\n", + "orders_table = orders_query.table_name\n", + "\n", + "click_stream_historical_query = click_stream_historical_feature_group.athena_query()\n", + "click_stream_historical_table = click_stream_historical_query.table_name\n", + "\n", + "click_stream_query = click_stream_feature_group.athena_query()\n", + "click_stream_table = click_stream_query.table_name\n", "\n", "# Store table names locally to be used in other notebooks\n", - "ps.add({'customers_table': customers_table,\n", - " 'products_table': products_table,\n", - " 'orders_table': orders_table,\n", - " 'click_stream_historical_table': click_stream_historical_table,\n", - " 'click_stream_table': click_stream_table})" + "ps.add({\n", + " 'customers_table': customers_table,\n", + " 'products_table': products_table,\n", + " 'orders_table': orders_table,\n", + " 'click_stream_historical_table': click_stream_historical_table,\n", + " 'click_stream_table': click_stream_table\n", + "})" ] }, { @@ -311,20 +999,44 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 40, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "IngestionManagerPandas(feature_group_name='recsys-click-stream-historical-fg-05-29-00-10', feature_definitions={'customer_id': {'FeatureName': 'customer_id', 'FeatureType': 'String'}, 'product_id': {'FeatureName': 'product_id', 'FeatureType': 'String'}, 'bought': {'FeatureName': 'bought', 'FeatureType': 'Integral'}, 'healthy_activity_last_2m': {'FeatureName': 'healthy_activity_last_2m', 'FeatureType': 'Integral'}, 'rating': {'FeatureName': 'rating', 'FeatureType': 'Fractional'}, 'event_time': {'FeatureName': 'event_time', 'FeatureType': 'Fractional'}}, sagemaker_fs_runtime_client_config=, sagemaker_session=, max_workers=3, max_processes=1, profile_name=None, _async_result=, _processing_pool=, _failed_indices=[])" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "ingest_data_into_feature_group(df_customers, customers_feature_group)\n", + "# Ingest data into the feature groups\n", + "customers_feature_group.ingest(data_frame=df_customers, max_workers=3, wait=False)\n", "customers_count = df_customers.shape[0]\n", "\n", - "ingest_data_into_feature_group(df_products, products_feature_group)\n", + "products_feature_group.ingest(data_frame=df_products, max_workers=3, wait=False)\n", "products_count = df_products.shape[0]\n", "\n", - "ingest_data_into_feature_group(df_orders, orders_feature_group)\n", + "orders_feature_group.ingest(data_frame=df_orders, max_workers=3, wait=False)\n", "orders_count = df_orders.shape[0]\n", "\n", - "ingest_data_into_feature_group(df_click_stream_historical, click_stream_historical_feature_group)\n", + "click_stream_historical_feature_group.ingest(data_frame=df_click_stream_historical, max_workers=3, wait=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ "click_stream_historical_count = df_click_stream_historical.shape[0]\n", "\n", "# Add Feature Group counts for later use\n", @@ -351,18 +1063,6 @@ "" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "wait_for_offline_data(customers_feature_group_name, df_customers, sagemaker_session)\n", - "wait_for_offline_data(products_feature_group_name, df_products, sagemaker_session)\n", - "wait_for_offline_data(orders_feature_group_name, df_orders, sagemaker_session)\n", - "wait_for_offline_data(click_stream_historical_feature_group_name, df_click_stream_historical, sagemaker_session)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -372,9 +1072,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 43, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "Offline Feature Store S3 Link" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "account_id = default_bucket.split('-')[-1]\n", "offline_store_url = f'https://s3.console.aws.amazon.com/s3/buckets/{default_bucket}?region={region}&prefix={prefix}/{account_id}/sagemaker/{region}/offline-store/&showversions=false'\n", @@ -383,9 +1098,19 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 44, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 29/05/2024 00:34:44\n" + ] + } + ], "source": [ "# Save all our local params\n", "ps.store()" @@ -400,14 +1125,619 @@ } ], "metadata": { - "instance_type": "ml.m5.4xlarge", + "availableInstances": [ + { + "_defaultOrder": 0, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.t3.medium", + "vcpuNum": 2 + }, + { + "_defaultOrder": 1, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.t3.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 2, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.t3.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 3, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.t3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 4, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 5, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 6, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 7, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 8, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 9, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 10, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 11, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 12, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5d.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 13, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5d.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 14, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5d.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 15, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5d.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 16, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5d.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 17, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5d.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 18, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5d.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 19, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 20, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": true, + "memoryGiB": 0, + "name": "ml.geospatial.interactive", + "supportedImageNames": [ + "sagemaker-geospatial-v1-0" + ], + "vcpuNum": 0 + }, + { + "_defaultOrder": 21, + "_isFastLaunch": true, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.c5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 22, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.c5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 23, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.c5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 24, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.c5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 25, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 72, + "name": "ml.c5.9xlarge", + "vcpuNum": 36 + }, + { + "_defaultOrder": 26, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 96, + "name": "ml.c5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 27, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 144, + "name": "ml.c5.18xlarge", + "vcpuNum": 72 + }, + { + "_defaultOrder": 28, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.c5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 29, + "_isFastLaunch": true, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g4dn.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 30, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g4dn.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 31, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g4dn.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 32, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g4dn.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 33, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g4dn.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 34, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g4dn.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 35, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 61, + "name": "ml.p3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 36, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 244, + "name": "ml.p3.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 37, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 488, + "name": "ml.p3.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 38, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.p3dn.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 39, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.r5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 40, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.r5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 41, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.r5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 42, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.r5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 43, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.r5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 44, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.r5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 45, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.r5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 46, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.r5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 47, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 48, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 49, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 50, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 51, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 52, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 53, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.g5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 54, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.g5.48xlarge", + "vcpuNum": 192 + }, + { + "_defaultOrder": 55, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 56, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4de.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 57, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.trn1.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 58, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1.32xlarge", + "vcpuNum": 128 + }, + { + "_defaultOrder": 59, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1n.32xlarge", + "vcpuNum": 128 + } + ], + "instance_type": "ml.t3.medium", "interpreter": { "hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322" }, "kernelspec": { - "display_name": "Python 3 (Data Science)", + "display_name": "Python 3 (Data Science 3.0)", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1" }, "language_info": { "codemirror_mode": { @@ -419,7 +1749,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/2_recommendation_engine_models.ipynb b/2_recommendation_engine_models.ipynb index 15823a9..7f416ec 100644 --- a/2_recommendation_engine_models.ipynb +++ b/2_recommendation_engine_models.ipynb @@ -37,8 +37,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import sagemaker\n", @@ -72,8 +74,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 7, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "role = sagemaker.get_execution_role()\n", @@ -89,8 +93,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 8, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# CF model variables\n", @@ -122,8 +128,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 9, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "parameters = ps.read()\n", @@ -134,11 +142,58 @@ "click_stream_historical_feature_group_name = parameters['click_stream_historical_feature_group_name']\n", "click_stream_feature_group_name = parameters['click_stream_feature_group_name']\n", "\n", - "customers_table = parameters['customers_table']\n", - "products_table = parameters['products_table']\n", - "orders_table = parameters['orders_table']\n", - "click_stream_historical_table = parameters['click_stream_historical_table']\n", - "click_stream_table = parameters['click_stream_table']" + "customers_feature_group = FeatureGroup(\n", + " name=customers_feature_group_name, sagemaker_session=sagemaker_session\n", + ")\n", + "products_feature_group = FeatureGroup(\n", + " name=products_feature_group_name, sagemaker_session=sagemaker_session\n", + ")\n", + "orders_feature_group = FeatureGroup(\n", + " name=orders_feature_group_name, sagemaker_session=sagemaker_session\n", + ")\n", + "click_stream_historical_feature_group = FeatureGroup(\n", + " name=click_stream_historical_feature_group_name, sagemaker_session=sagemaker_session\n", + ")\n", + "click_stream_feature_group = FeatureGroup(\n", + " name=click_stream_feature_group_name, sagemaker_session=sagemaker_session\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "FeatureGroup recsys-customers-fg-05-29-00-10 successfully created.\n", + "FeatureGroup recsys-products-fg-05-29-00-10 successfully created.\n", + "FeatureGroup recsys-orders-fg-05-29-00-10 successfully created.\n", + "FeatureGroup recsys-click-stream-historical-fg-05-29-00-10 successfully created.\n", + "FeatureGroup recsys-click-stream-fg-05-29-00-10 successfully created.\n" + ] + } + ], + "source": [ + "# df_click_stream_historical_data.head()\n", + "def check_feature_group_status(feature_group):\n", + " status = feature_group.describe().get(\"FeatureGroupStatus\")\n", + " while status == \"Creating\":\n", + " print(\"Waiting for Feature Group to be Created\")\n", + " time.sleep(5)\n", + " status = feature_group.describe().get(\"FeatureGroupStatus\")\n", + " print(f\"FeatureGroup {feature_group.name} successfully created.\")\n", + " \n", + " \n", + "check_feature_group_status(customers_feature_group)\n", + "check_feature_group_status(products_feature_group)\n", + "check_feature_group_status(orders_feature_group)\n", + "check_feature_group_status(click_stream_historical_feature_group)\n", + "check_feature_group_status(click_stream_feature_group)" ] }, { @@ -159,10 +214,128 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 11, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idproduct_idratingstateageis_marriedproduct_name
0C2786P12061.199858new mexico750creamy caramel filled hard candies
1C866P12061.424285delaware351creamy caramel filled hard candies
2C2786P149842.513348new mexico7501 apple + 1 mango fruit bar
3C4053P9922.855658louisiana241outshine simply yogurt bars strawberry
4C4913P143412.773447north carolina280vegetarian chili seasoning
\n", + "
" + ], + "text/plain": [ + " customer_id product_id rating state age is_married \\\n", + "0 C2786 P1206 1.199858 new mexico 75 0 \n", + "1 C866 P1206 1.424285 delaware 35 1 \n", + "2 C2786 P14984 2.513348 new mexico 75 0 \n", + "3 C4053 P992 2.855658 louisiana 24 1 \n", + "4 C4913 P14341 2.773447 north carolina 28 0 \n", + "\n", + " product_name \n", + "0 creamy caramel filled hard candies \n", + "1 creamy caramel filled hard candies \n", + "2 1 apple + 1 mango fruit bar \n", + "3 outshine simply yogurt bars strawberry \n", + "4 vegetarian chili seasoning " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "click_stream_query = click_stream_historical_feature_group.athena_query()\n", + "click_stream_historical_table = click_stream_query.table_name\n", + "\n", + "customers_query = customers_feature_group.athena_query()\n", + "customers_table = customers_query.table_name\n", + "\n", + "products_query = products_feature_group.athena_query()\n", + "products_table = products_query.table_name\n", + "\n", + "\n", "query = f'''\n", "select click_stream_customers.customer_id,\n", " products.product_id,\n", @@ -189,9 +362,11 @@ "where click_stream_customers.bought = 1\n", "'''\n", "\n", - "df_cf_features, query = query_offline_store(click_stream_feature_group_name, query,\n", - " sagemaker_session)\n", - "df_cf_features.head()" + "df_cf_features = pd.DataFrame()\n", + "click_stream_query.run(query_string=query, output_location='s3://'+default_bucket+'/query_results/')\n", + "click_stream_query.wait()\n", + "df_cf_features = click_stream_query.as_dataframe()\n", + "df_cf_features.head(5)" ] }, { @@ -221,8 +396,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 12, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def transform_cf_data(training_df, inference_df=None):\n", @@ -277,8 +454,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 13, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "X, y = load_dataset(df_cf_features)" @@ -293,9 +472,19 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 14, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(65393, 28366) (16349, 28366) (65393,) (16349,)\n" + ] + } + ], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", @@ -320,9 +509,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 15, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "s3://sagemaker-ap-southeast-2-XXXXXXXXXXXX/recsys/train/train.protobuf\n", + "s3://sagemaker-ap-southeast-2-XXXXXXXXXXXX/recsys/test/test.protobuf\n", + "Output: s3://sagemaker-ap-southeast-2-XXXXXXXXXXXX/recsys/output\n" + ] + } + ], "source": [ "def write_dataset_to_protobuf(X, y, bucket, prefix, key):\n", " \"\"\"\n", @@ -352,8 +553,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 16, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Add variables to be saved for later notebooks\n", @@ -385,8 +588,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 17, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "container = sagemaker.image_uris.retrieve(\"factorization-machines\", region=region)\n", @@ -420,17 +625,305 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 18, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker:Creating training-job with name: factorization-machines-2024-05-29-00-35-05-125\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-05-29 00:35:05 Starting - Starting the training job...\n", + "2024-05-29 00:35:21 Starting - Preparing the instances for training...\n", + "2024-05-29 00:35:58 Downloading - Downloading the training image.....................\n", + "2024-05-29 00:39:24 Training - Training image download completed. Training in progress..\u001b[34mDocker entrypoint called with argument(s): train\u001b[0m\n", + "\u001b[34mRunning default environment configuration script\u001b[0m\n", + "\u001b[34m/opt/amazon/lib/python3.8/site-packages/mxnet/model.py:97: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n", + " if num_device is 1 and 'dist' not in kvstore:\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-conf.json: {'epochs': 1, 'mini_batch_size': '1000', 'use_bias': 'true', 'use_linear': 'true', 'bias_lr': '0.1', 'linear_lr': '0.001', 'factors_lr': '0.0001', 'bias_wd': '0.01', 'linear_wd': '0.001', 'factors_wd': '0.00001', 'bias_init_method': 'normal', 'bias_init_sigma': '0.01', 'linear_init_method': 'normal', 'linear_init_sigma': '0.01', 'factors_init_method': 'normal', 'factors_init_sigma': '0.001', 'batch_metrics_publish_interval': '500', '_data_format': 'record', '_kvstore': 'auto', '_learning_rate': '1.0', '_log_level': 'info', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_optimizer': 'adam', '_tuning_objective_metric': '', '_use_full_symbolic': 'true', '_wd': '1.0'}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Merging with provided configuration from /opt/ml/input/config/hyperparameters.json: {'epochs': '20', 'feature_dim': '28366', 'mini_batch_size': '1000', 'num_factors': '64', 'predictor_type': 'regressor'}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Final configuration: {'epochs': '20', 'mini_batch_size': '1000', 'use_bias': 'true', 'use_linear': 'true', 'bias_lr': '0.1', 'linear_lr': '0.001', 'factors_lr': '0.0001', 'bias_wd': '0.01', 'linear_wd': '0.001', 'factors_wd': '0.00001', 'bias_init_method': 'normal', 'bias_init_sigma': '0.01', 'linear_init_method': 'normal', 'linear_init_sigma': '0.01', 'factors_init_method': 'normal', 'factors_init_sigma': '0.001', 'batch_metrics_publish_interval': '500', '_data_format': 'record', '_kvstore': 'auto', '_learning_rate': '1.0', '_log_level': 'info', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_optimizer': 'adam', '_tuning_objective_metric': '', '_use_full_symbolic': 'true', '_wd': '1.0', 'feature_dim': '28366', 'num_factors': '64', 'predictor_type': 'regressor'}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 WARNING 140627447908160] Loggers have already been setup.\u001b[0m\n", + "\u001b[34mProcess 6 is a worker.\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Using default worker.\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Checkpoint loading and saving are disabled.\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:33.034] [tensorio] [warning] TensorIO is already initialized; ignoring the initialization routine.\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:33.037] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 0, \"duration\": 8, \"num_examples\": 1, \"num_bytes\": 111048}\u001b[0m\n", + "\u001b[34m/opt/amazon/python3.8/lib/python3.8/subprocess.py:848: RuntimeWarning: line buffering (buffering=1) isn't supported in binary mode, the default buffer size will be used\n", + " self.stdout = io.open(c2pread, 'rb', bufsize)\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] nvidia-smi: took 0.030 seconds to run.\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] nvidia-smi identified 0 GPUs.\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Number of GPUs being used: 0\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] [Sparse network] Building a sparse network.\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] Create Store: local\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943173.0295057, \"EndTime\": 1716943173.070657, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"initialize.time\": {\"sum\": 33.90049934387207, \"count\": 1, \"min\": 33.90049934387207, \"max\": 33.90049934387207}}}\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943173.0707624, \"EndTime\": 1716943173.070795, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"Meta\": \"init_train_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1000.0, \"count\": 1, \"min\": 1000, \"max\": 1000}, \"Total Batches Seen\": {\"sum\": 1.0, \"count\": 1, \"min\": 1, \"max\": 1}, \"Max Records Seen Between Resets\": {\"sum\": 1000.0, \"count\": 1, \"min\": 1000, \"max\": 1000}, \"Max Batches Seen Between Resets\": {\"sum\": 1.0, \"count\": 1, \"min\": 1, \"max\": 1}, \"Reset Count\": {\"sum\": 1.0, \"count\": 1, \"min\": 1, \"max\": 1}, \"Number of Records Since Last Reset\": {\"sum\": 0.0, \"count\": 1, \"min\": 0, \"max\": 0}, \"Number of Batches Since Last Reset\": {\"sum\": 0.0, \"count\": 1, \"min\": 0, \"max\": 0}}}\u001b[0m\n", + "\u001b[34m[00:39:33] /opt/brazil-pkg-cache/packages/AIAlgorithmsMXNet/AIAlgorithmsMXNet-1.3.x_Cuda_11.1.x.404.0/AL2_x86_64/generic-flavor/src/src/kvstore/./kvstore_local.h:306: Warning: non-default weights detected during kvstore pull. This call has been ignored. Please make sure to use kv.row_sparse_pull() or module.prepare() with row_ids.\u001b[0m\n", + "\u001b[34m[00:39:33] /opt/brazil-pkg-cache/packages/AIAlgorithmsMXNet/AIAlgorithmsMXNet-1.3.x_Cuda_11.1.x.404.0/AL2_x86_64/generic-flavor/src/src/kvstore/./kvstore_local.h:306: Warning: non-default weights detected during kvstore pull. This call has been ignored. Please make sure to use kv.row_sparse_pull() or module.prepare() with row_ids.\u001b[0m\n", + "\u001b[34m[00:39:33] /opt/brazil-pkg-cache/packages/AIAlgorithmsMXNet/AIAlgorithmsMXNet-1.3.x_Cuda_11.1.x.404.0/AL2_x86_64/generic-flavor/src/src/operator/././../common/utils.h:450: Optimizer with lazy_update = True detected. Be aware that lazy update with row_sparse gradient is different from standard update, and may lead to different empirical results. See https://mxnet.incubator.apache.org/api/python/optimization/optimization.html for more details.\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, batch=0 train rmse =2.3243699793236985\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, batch=0 train mse =5.40269580078125\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, batch=0 train absolute_loss =2.219640380859375\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:33.930] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 2, \"duration\": 769, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, train rmse =1.04838653056041\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, train mse =1.0991143174604936\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=0, train absolute_loss =0.8338966582327179\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943173.0707183, \"EndTime\": 1716943173.931281, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"epochs\": {\"sum\": 20.0, \"count\": 1, \"min\": 20, \"max\": 20}, \"update.time\": {\"sum\": 860.2795600891113, \"count\": 1, \"min\": 860.2795600891113, \"max\": 860.2795600891113}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #progress_metric: host=algo-1, completed 5.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943173.0709755, \"EndTime\": 1716943173.9314995, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 0, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 66393.0, \"count\": 1, \"min\": 66393, \"max\": 66393}, \"Total Batches Seen\": {\"sum\": 67.0, \"count\": 1, \"min\": 67, \"max\": 67}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 2.0, \"count\": 1, \"min\": 2, \"max\": 2}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=75985.0648132552 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, batch=0 train rmse =0.699270533173287\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, batch=0 train mse =0.4889792785644531\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:33 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, batch=0 train absolute_loss =0.5935192260742187\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:34.948] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 4, \"duration\": 1015, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, train rmse =0.7150839339986994\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, train mse =0.5113450326630563\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=1, train absolute_loss =0.6103684655391809\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943173.9313607, \"EndTime\": 1716943174.9494588, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 1017.7371501922607, \"count\": 1, \"min\": 1017.7371501922607, \"max\": 1017.7371501922607}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #progress_metric: host=algo-1, completed 10.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943173.9316964, \"EndTime\": 1716943174.949821, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 1, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 131786.0, \"count\": 1, \"min\": 131786, \"max\": 131786}, \"Total Batches Seen\": {\"sum\": 133.0, \"count\": 1, \"min\": 133, \"max\": 133}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 3.0, \"count\": 1, \"min\": 3, \"max\": 3}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=64222.65226103779 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, batch=0 train rmse =0.6905555103261409\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, batch=0 train mse =0.4768669128417969\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:34 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, batch=0 train absolute_loss =0.586952880859375\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:35.908] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 6, \"duration\": 952, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, train rmse =0.7090962377316878\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, train mse =0.5028174743652344\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=2, train absolute_loss =0.6054784185236151\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943174.949536, \"EndTime\": 1716943175.9089746, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 958.7352275848389, \"count\": 1, \"min\": 958.7352275848389, \"max\": 958.7352275848389}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #progress_metric: host=algo-1, completed 15.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943174.950213, \"EndTime\": 1716943175.9092712, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 2, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 197179.0, \"count\": 1, \"min\": 197179, \"max\": 197179}, \"Total Batches Seen\": {\"sum\": 199.0, \"count\": 1, \"min\": 199, \"max\": 199}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 4.0, \"count\": 1, \"min\": 4, \"max\": 4}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=68177.25548425746 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, batch=0 train rmse =0.6858606609154707\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, batch=0 train mse =0.47040484619140627\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:35 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, batch=0 train absolute_loss =0.5828222045898438\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:36.728] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 8, \"duration\": 817, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, train rmse =0.7023911057760747\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, train mse =0.4933532654733369\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=3, train absolute_loss =0.5996325369170218\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943175.9090288, \"EndTime\": 1716943176.729298, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 819.7922706604004, \"count\": 1, \"min\": 819.7922706604004, \"max\": 819.7922706604004}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #progress_metric: host=algo-1, completed 20.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943175.9094827, \"EndTime\": 1716943176.7295601, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 3, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 262572.0, \"count\": 1, \"min\": 262572, \"max\": 262572}, \"Total Batches Seen\": {\"sum\": 265.0, \"count\": 1, \"min\": 265, \"max\": 265}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 5.0, \"count\": 1, \"min\": 5, \"max\": 5}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=79730.76046829497 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, batch=0 train rmse =0.679621180313319\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, batch=0 train mse =0.46188494873046876\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:36 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, batch=0 train absolute_loss =0.5771847534179687\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:37.503] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 10, \"duration\": 771, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, train rmse =0.6938342617224935\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, train mse =0.48140598273999763\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=4, train absolute_loss =0.5921244959975734\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943176.7293534, \"EndTime\": 1716943177.5039217, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 774.1501331329346, \"count\": 1, \"min\": 774.1501331329346, \"max\": 774.1501331329346}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #progress_metric: host=algo-1, completed 25.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943176.729749, \"EndTime\": 1716943177.504056, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 4, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 327965.0, \"count\": 1, \"min\": 327965, \"max\": 327965}, \"Total Batches Seen\": {\"sum\": 331.0, \"count\": 1, \"min\": 331, \"max\": 331}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 6.0, \"count\": 1, \"min\": 6, \"max\": 6}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=84444.792460138 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, batch=0 train rmse =0.6723749729489096\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, batch=0 train mse =0.45208810424804685\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:37 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, batch=0 train absolute_loss =0.5705239868164063\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:38.292] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 12, \"duration\": 783, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, train rmse =0.6841473968757971\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, train mse =0.46805766065192944\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=5, train absolute_loss =0.5835006454930161\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943177.5039735, \"EndTime\": 1716943178.293152, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 788.9235019683838, \"count\": 1, \"min\": 788.9235019683838, \"max\": 788.9235019683838}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #progress_metric: host=algo-1, completed 30.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943177.5042048, \"EndTime\": 1716943178.293383, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 5, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 393358.0, \"count\": 1, \"min\": 393358, \"max\": 393358}, \"Total Batches Seen\": {\"sum\": 397.0, \"count\": 1, \"min\": 397, \"max\": 397}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 7.0, \"count\": 1, \"min\": 7, \"max\": 7}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=82852.19188322074 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, batch=0 train rmse =0.664622994159271\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, batch=0 train mse =0.4417237243652344\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:38 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, batch=0 train absolute_loss =0.5635284423828125\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:39.068] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 14, \"duration\": 770, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, train rmse =0.6739215477205758\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, train mse =0.45417025248209636\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=6, train absolute_loss =0.5742900714296283\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943178.2932088, \"EndTime\": 1716943179.069036, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 775.421142578125, \"count\": 1, \"min\": 775.421142578125, \"max\": 775.421142578125}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #progress_metric: host=algo-1, completed 35.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943178.2935927, \"EndTime\": 1716943179.069173, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 6, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 458751.0, \"count\": 1, \"min\": 458751, \"max\": 458751}, \"Total Batches Seen\": {\"sum\": 463.0, \"count\": 1, \"min\": 463, \"max\": 463}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 8.0, \"count\": 1, \"min\": 8, \"max\": 8}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=84306.73052890941 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, batch=0 train rmse =0.6566801510635001\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, batch=0 train mse =0.43122882080078123\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, batch=0 train absolute_loss =0.5564387817382812\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:39.834] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 16, \"duration\": 763, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, train rmse =0.6635769646690042\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, train mse =0.44033438803932884\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=7, train absolute_loss =0.564879950321082\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943179.0690897, \"EndTime\": 1716943179.8349438, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 765.5997276306152, \"count\": 1, \"min\": 765.5997276306152, \"max\": 765.5997276306152}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #progress_metric: host=algo-1, completed 40.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943179.0693188, \"EndTime\": 1716943179.8351235, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 7, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 524144.0, \"count\": 1, \"min\": 524144, \"max\": 524144}, \"Total Batches Seen\": {\"sum\": 529.0, \"count\": 1, \"min\": 529, \"max\": 529}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 9.0, \"count\": 1, \"min\": 9, \"max\": 9}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=85380.07795078939 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, batch=0 train rmse =0.6487400461363069\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, batch=0 train mse =0.4208636474609375\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:39 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, batch=0 train absolute_loss =0.5494663696289063\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:40.590] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 18, \"duration\": 753, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, train rmse =0.6534163943720617\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, train mse =0.4269529844341856\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=8, train absolute_loss =0.5555228363961885\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943179.835001, \"EndTime\": 1716943180.591622, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 756.2096118927002, \"count\": 1, \"min\": 756.2096118927002, \"max\": 756.2096118927002}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #progress_metric: host=algo-1, completed 45.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943179.8353837, \"EndTime\": 1716943180.591916, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 8, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 589537.0, \"count\": 1, \"min\": 589537, \"max\": 589537}, \"Total Batches Seen\": {\"sum\": 595.0, \"count\": 1, \"min\": 595, \"max\": 595}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 10.0, \"count\": 1, \"min\": 10, \"max\": 10}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=86422.05922478618 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, batch=0 train rmse =0.6409521363668906\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, batch=0 train mse =0.41081964111328123\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:40 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, batch=0 train absolute_loss =0.5427968139648438\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:41.368] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 20, \"duration\": 771, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, train rmse =0.6436583883131721\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, train mse =0.4142961208459103\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=9, train absolute_loss =0.5464532059178208\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943180.5917187, \"EndTime\": 1716943181.3686094, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 776.268482208252, \"count\": 1, \"min\": 776.268482208252, \"max\": 776.268482208252}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #progress_metric: host=algo-1, completed 50.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943180.592315, \"EndTime\": 1716943181.3688226, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 9, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 654930.0, \"count\": 1, \"min\": 654930, \"max\": 654930}, \"Total Batches Seen\": {\"sum\": 661.0, \"count\": 1, \"min\": 661, \"max\": 661}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 11.0, \"count\": 1, \"min\": 11, \"max\": 11}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=84197.7997148159 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, batch=0 train rmse =0.6334473089090067\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, batch=0 train mse =0.4012554931640625\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:41 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, batch=0 train absolute_loss =0.5361207885742187\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:42.121] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 22, \"duration\": 749, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, train rmse =0.6344510547758051\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, train mse =0.40252814090613165\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=10, train absolute_loss =0.5378079723011364\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943181.3686695, \"EndTime\": 1716943182.1223965, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 753.2670497894287, \"count\": 1, \"min\": 753.2670497894287, \"max\": 753.2670497894287}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #progress_metric: host=algo-1, completed 55.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943181.3691046, \"EndTime\": 1716943182.1225317, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 10, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 720323.0, \"count\": 1, \"min\": 720323, \"max\": 720323}, \"Total Batches Seen\": {\"sum\": 727.0, \"count\": 1, \"min\": 727, \"max\": 727}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 12.0, \"count\": 1, \"min\": 12, \"max\": 12}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=86780.1220245459 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, batch=0 train rmse =0.6263321496378115\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, batch=0 train mse =0.39229196166992186\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, batch=0 train absolute_loss =0.5296707763671875\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:42.936] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 24, \"duration\": 812, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, train rmse =0.6258821093660648\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, train mse =0.3917284148245147\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=11, train absolute_loss =0.5296877159349846\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943182.1224484, \"EndTime\": 1716943182.9367995, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 814.0368461608887, \"count\": 1, \"min\": 814.0368461608887, \"max\": 814.0368461608887}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #progress_metric: host=algo-1, completed 60.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943182.1227415, \"EndTime\": 1716943182.9370315, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 11, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 785716.0, \"count\": 1, \"min\": 785716, \"max\": 785716}, \"Total Batches Seen\": {\"sum\": 793.0, \"count\": 1, \"min\": 793, \"max\": 793}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 13.0, \"count\": 1, \"min\": 13, \"max\": 13}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=80297.43202556841 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, batch=0 train rmse =0.6196789851832434\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, batch=0 train mse =0.38400204467773436\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:42 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, batch=0 train absolute_loss =0.5234930419921875\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:43.692] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 26, \"duration\": 753, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, train rmse =0.6179902861519048\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, train mse =0.3819119937781132\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=12, train absolute_loss =0.5221529467033618\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943182.93689, \"EndTime\": 1716943183.692754, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 755.4678916931152, \"count\": 1, \"min\": 755.4678916931152, \"max\": 755.4678916931152}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #progress_metric: host=algo-1, completed 65.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943182.9372606, \"EndTime\": 1716943183.6929443, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 12, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 851109.0, \"count\": 1, \"min\": 851109, \"max\": 851109}, \"Total Batches Seen\": {\"sum\": 859.0, \"count\": 1, \"min\": 859, \"max\": 859}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 14.0, \"count\": 1, \"min\": 14, \"max\": 14}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=86523.25777123167 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, batch=0 train rmse =0.613523563793664\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, batch=0 train mse =0.3764111633300781\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:43 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, batch=0 train absolute_loss =0.5177263793945313\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:44.538] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 28, \"duration\": 840, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, train rmse =0.610777409585509\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, train mse =0.3730490440599846\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=13, train absolute_loss =0.5152088220769708\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943183.6928098, \"EndTime\": 1716943184.538854, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 845.649242401123, \"count\": 1, \"min\": 845.649242401123, \"max\": 845.649242401123}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #progress_metric: host=algo-1, completed 70.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943183.6931825, \"EndTime\": 1716943184.5389938, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 13, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 916502.0, \"count\": 1, \"min\": 916502, \"max\": 916502}, \"Total Batches Seen\": {\"sum\": 925.0, \"count\": 1, \"min\": 925, \"max\": 925}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 15.0, \"count\": 1, \"min\": 15, \"max\": 15}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=77306.3238888831 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, batch=0 train rmse =0.6078711662320628\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, batch=0 train mse =0.3695073547363281\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:44 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, batch=0 train absolute_loss =0.5121652221679688\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:45.430] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 30, \"duration\": 890, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, train rmse =0.6042197229573179\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, train mse =0.3650814736106179\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=14, train absolute_loss =0.5088311596494732\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943184.5389092, \"EndTime\": 1716943185.4312375, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 892.073392868042, \"count\": 1, \"min\": 892.073392868042, \"max\": 892.073392868042}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #progress_metric: host=algo-1, completed 75.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943184.5391417, \"EndTime\": 1716943185.4313726, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 14, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 981895.0, \"count\": 1, \"min\": 981895, \"max\": 981895}, \"Total Batches Seen\": {\"sum\": 991.0, \"count\": 1, \"min\": 991, \"max\": 991}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 16.0, \"count\": 1, \"min\": 16, \"max\": 16}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=73285.19445424894 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, batch=0 train rmse =0.6027058179398268\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, batch=0 train mse =0.36325430297851563\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:45 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, batch=0 train absolute_loss =0.5068946533203125\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:46.285] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 32, \"duration\": 849, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, train rmse =0.598277434069688\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, train mse =0.35793588811700994\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=15, train absolute_loss =0.5029859531286991\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943185.4312897, \"EndTime\": 1716943186.2860541, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 854.485034942627, \"count\": 1, \"min\": 854.485034942627, \"max\": 854.485034942627}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #progress_metric: host=algo-1, completed 80.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943185.4315462, \"EndTime\": 1716943186.2862678, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 15, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1047288.0, \"count\": 1, \"min\": 1047288, \"max\": 1047288}, \"Total Batches Seen\": {\"sum\": 1057.0, \"count\": 1, \"min\": 1057, \"max\": 1057}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 17.0, \"count\": 1, \"min\": 17, \"max\": 17}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=76500.53174537209 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, batch=0 train rmse =0.597998982610443\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, batch=0 train mse =0.357602783203125\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:46 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, batch=0 train absolute_loss =0.5020321655273438\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:47.030] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 34, \"duration\": 742, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, train rmse =0.5929021012345286\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, train mse =0.35153290164831913\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=16, train absolute_loss =0.4976553497314453\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943186.2861469, \"EndTime\": 1716943187.031127, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 744.6818351745605, \"count\": 1, \"min\": 744.6818351745605, \"max\": 744.6818351745605}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #progress_metric: host=algo-1, completed 85.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943186.2864232, \"EndTime\": 1716943187.031272, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 16, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1112681.0, \"count\": 1, \"min\": 1112681, \"max\": 1112681}, \"Total Batches Seen\": {\"sum\": 1123.0, \"count\": 1, \"min\": 1123, \"max\": 1123}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 18.0, \"count\": 1, \"min\": 18, \"max\": 18}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=87784.25226271317 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, batch=0 train rmse =0.5937158707296362\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, batch=0 train mse =0.35249853515625\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, batch=0 train absolute_loss =0.49739056396484377\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:47.781] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 36, \"duration\": 748, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, train rmse =0.5880419338814606\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, train mse =0.3457933160030481\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=17, train absolute_loss =0.49279525710597183\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943187.031182, \"EndTime\": 1716943187.7815292, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 750.0548362731934, \"count\": 1, \"min\": 750.0548362731934, \"max\": 750.0548362731934}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #progress_metric: host=algo-1, completed 90.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943187.0314512, \"EndTime\": 1716943187.7816916, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 17, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1178074.0, \"count\": 1, \"min\": 1178074, \"max\": 1178074}, \"Total Batches Seen\": {\"sum\": 1189.0, \"count\": 1, \"min\": 1189, \"max\": 1189}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 19.0, \"count\": 1, \"min\": 19, \"max\": 19}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=87153.18146307641 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, batch=0 train rmse =0.5898197946294433\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, batch=0 train mse =0.34788739013671877\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:47 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, batch=0 train absolute_loss =0.49295233154296875\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:48.601] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 38, \"duration\": 817, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, train rmse =0.5836454162716652\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, train mse =0.3406419719349254\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=18, train absolute_loss =0.4883592219497218\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943187.7815838, \"EndTime\": 1716943188.6016605, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 819.7531700134277, \"count\": 1, \"min\": 819.7531700134277, \"max\": 819.7531700134277}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #progress_metric: host=algo-1, completed 95.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943187.7818813, \"EndTime\": 1716943188.6018543, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 18, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1243467.0, \"count\": 1, \"min\": 1243467, \"max\": 1243467}, \"Total Batches Seen\": {\"sum\": 1255.0, \"count\": 1, \"min\": 1255, \"max\": 1255}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 20.0, \"count\": 1, \"min\": 20, \"max\": 20}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=79740.61198809523 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, batch=0 train rmse =0.5862745645090143\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, batch=0 train mse =0.3437178649902344\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:48 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, batch=0 train absolute_loss =0.48873703002929686\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:49.379] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/train\", \"epoch\": 40, \"duration\": 775, \"num_examples\": 66, \"num_bytes\": 7310464}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, train rmse =0.5796635500736886\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, train mse =0.3360098312840317\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, epoch=19, train absolute_loss =0.4843095837217389\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, train rmse =0.5796635500736886\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, train mse =0.3360098312840317\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, train absolute_loss =0.4843095837217389\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943188.6017206, \"EndTime\": 1716943189.3805523, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"update.time\": {\"sum\": 778.4221172332764, \"count\": 1, \"min\": 778.4221172332764, \"max\": 778.4221172332764}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #progress_metric: host=algo-1, completed 100.0 % of epochs\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943188.6021047, \"EndTime\": 1716943189.3807492, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"epoch\": 19, \"Meta\": \"training_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 1308860.0, \"count\": 1, \"min\": 1308860, \"max\": 1308860}, \"Total Batches Seen\": {\"sum\": 1321.0, \"count\": 1, \"min\": 1321, \"max\": 1321}, \"Max Records Seen Between Resets\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Max Batches Seen Between Resets\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}, \"Reset Count\": {\"sum\": 21.0, \"count\": 1, \"min\": 21, \"max\": 21}, \"Number of Records Since Last Reset\": {\"sum\": 65393.0, \"count\": 1, \"min\": 65393, \"max\": 65393}, \"Number of Batches Since Last Reset\": {\"sum\": 66.0, \"count\": 1, \"min\": 66, \"max\": 66}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #throughput_metric: host=algo-1, train throughput=83971.73006032793 records/second\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 WARNING 140627447908160] wait_for_all_workers will not sync workers since the kv store is not running distributed\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] Pulling entire model from kvstore to finalize\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943189.380624, \"EndTime\": 1716943189.386687, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"finalize.time\": {\"sum\": 5.669116973876953, \"count\": 1, \"min\": 5.669116973876953, \"max\": 5.669116973876953}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] Saved checkpoint to \"/tmp/tmpvigxala1/state-0001.params\"\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:49.419] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/test\", \"epoch\": 0, \"duration\": 16385, \"num_examples\": 1, \"num_bytes\": 111960}\u001b[0m\n", + "\u001b[34m[2024-05-29 00:39:49.532] [tensorio] [info] epoch_stats={\"data_pipeline\": \"/opt/ml/input/data/test\", \"epoch\": 1, \"duration\": 112, \"num_examples\": 17, \"num_bytes\": 1828592}\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943189.4191773, \"EndTime\": 1716943189.532194, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\", \"Meta\": \"test_data_iter\"}, \"Metrics\": {\"Total Records Seen\": {\"sum\": 16349.0, \"count\": 1, \"min\": 16349, \"max\": 16349}, \"Total Batches Seen\": {\"sum\": 17.0, \"count\": 1, \"min\": 17, \"max\": 17}, \"Max Records Seen Between Resets\": {\"sum\": 16349.0, \"count\": 1, \"min\": 16349, \"max\": 16349}, \"Max Batches Seen Between Resets\": {\"sum\": 17.0, \"count\": 1, \"min\": 17, \"max\": 17}, \"Reset Count\": {\"sum\": 1.0, \"count\": 1, \"min\": 1, \"max\": 1}, \"Number of Records Since Last Reset\": {\"sum\": 16349.0, \"count\": 1, \"min\": 16349, \"max\": 16349}, \"Number of Batches Since Last Reset\": {\"sum\": 17.0, \"count\": 1, \"min\": 17, \"max\": 17}}}\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #test_score (algo-1) : ('rmse', 0.7654671821101969)\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #test_score (algo-1) : ('mse', 0.5859400068877254)\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #test_score (algo-1) : ('absolute_loss', 0.6434210347315538)\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, test rmse =0.7654671821101969\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, test mse =0.5859400068877254\u001b[0m\n", + "\u001b[34m[05/29/2024 00:39:49 INFO 140627447908160] #quality_metric: host=algo-1, test absolute_loss =0.6434210347315538\u001b[0m\n", + "\u001b[34m#metrics {\"StartTime\": 1716943189.3869832, \"EndTime\": 1716943189.532878, \"Dimensions\": {\"Algorithm\": \"factorization-machines\", \"Host\": \"algo-1\", \"Operation\": \"training\"}, \"Metrics\": {\"setuptime\": {\"sum\": 13.551473617553711, \"count\": 1, \"min\": 13.551473617553711, \"max\": 13.551473617553711}, \"totaltime\": {\"sum\": 16519.063711166382, \"count\": 1, \"min\": 16519.063711166382, \"max\": 16519.063711166382}}}\u001b[0m\n", + "\n", + "2024-05-29 00:40:03 Uploading - Uploading generated training model\n", + "2024-05-29 00:40:03 Completed - Training job completed\n", + "Training seconds: 259\n", + "Billable seconds: 259\n" + ] + } + ], "source": [ "fm.fit({'train': train_data_location, 'test': test_data_location})" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 19, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "training_job_name = fm.latest_training_job.job_name" @@ -452,9 +945,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 20, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker:Creating model with name: factorization-machines-2024-05-29-00-40-17-951\n", + "INFO:sagemaker:Creating endpoint-config with name recsys-cf-model-05-29-00-34\n", + "INFO:sagemaker:Creating endpoint with name recsys-cf-model-05-29-00-34\n" + ] + } + ], "source": [ "cf_model_predictor = fm.deploy(\n", " endpoint_name = cf_model_endpoint_name,\n", @@ -468,9 +973,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 21, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'recsys-cf-model-05-29-00-34'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cf_model_predictor.endpoint_name" ] @@ -493,9 +1011,113 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 22, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker:Query 3839da01-c1b9-45c3-980a-24b1b694198d is being executed.\n", + "INFO:sagemaker:Query 3839da01-c1b9-45c3-980a-24b1b694198d successfully executed.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
boughthealthy_activity_last_2mproduct_health_indexcustomer_health_indexproduct_category
0030.90.250698vitamins_supplements
1010.90.099806energy_granola_bars
2120.30.250698packaged_cheese
3100.30.704001baking_ingredients
40100.30.250698packaged_cheese
\n", + "
" + ], + "text/plain": [ + " bought healthy_activity_last_2m product_health_index \\\n", + "0 0 3 0.9 \n", + "1 0 1 0.9 \n", + "2 1 2 0.3 \n", + "3 1 0 0.3 \n", + "4 0 10 0.3 \n", + "\n", + " customer_health_index product_category \n", + "0 0.250698 vitamins_supplements \n", + "1 0.099806 energy_granola_bars \n", + "2 0.250698 packaged_cheese \n", + "3 0.704001 baking_ingredients \n", + "4 0.250698 packaged_cheese " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "query = f'''\n", "select bought,\n", @@ -545,8 +1167,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 23, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "df_rank_features = pd.concat([df_rank_features, pd.get_dummies(df_rank_features['product_category'], prefix='prod_cat')], axis=1)\n", @@ -555,9 +1179,246 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 24, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
boughthealthy_activity_last_2mproduct_health_indexcustomer_health_indexprod_cat_baby_food_formulaprod_cat_baking_ingredientsprod_cat_candy_chocolateprod_cat_chips_pretzelsprod_cat_cleaning_productsprod_cat_coffee...prod_cat_hair_careprod_cat_ice_cream_iceprod_cat_juice_nectarsprod_cat_packaged_cheeseprod_cat_refrigeratedprod_cat_soup_broth_bouillonprod_cat_spices_seasoningsprod_cat_teaprod_cat_vitamins_supplementsprod_cat_yogurt
0030.90.250698FalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
1010.90.099806FalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
2120.30.250698FalseFalseFalseFalseFalseFalse...FalseFalseFalseTrueFalseFalseFalseFalseFalseFalse
3100.30.704001FalseTrueFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
40100.30.250698FalseFalseFalseFalseFalseFalse...FalseFalseFalseTrueFalseFalseFalseFalseFalseFalse
\n", + "

5 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " bought healthy_activity_last_2m product_health_index \\\n", + "0 0 3 0.9 \n", + "1 0 1 0.9 \n", + "2 1 2 0.3 \n", + "3 1 0 0.3 \n", + "4 0 10 0.3 \n", + "\n", + " customer_health_index prod_cat_baby_food_formula \\\n", + "0 0.250698 False \n", + "1 0.099806 False \n", + "2 0.250698 False \n", + "3 0.704001 False \n", + "4 0.250698 False \n", + "\n", + " prod_cat_baking_ingredients prod_cat_candy_chocolate \\\n", + "0 False False \n", + "1 False False \n", + "2 False False \n", + "3 True False \n", + "4 False False \n", + "\n", + " prod_cat_chips_pretzels prod_cat_cleaning_products prod_cat_coffee ... \\\n", + "0 False False False ... \n", + "1 False False False ... \n", + "2 False False False ... \n", + "3 False False False ... \n", + "4 False False False ... \n", + "\n", + " prod_cat_hair_care prod_cat_ice_cream_ice prod_cat_juice_nectars \\\n", + "0 False False False \n", + "1 False False False \n", + "2 False False False \n", + "3 False False False \n", + "4 False False False \n", + "\n", + " prod_cat_packaged_cheese prod_cat_refrigerated \\\n", + "0 False False \n", + "1 False False \n", + "2 True False \n", + "3 False False \n", + "4 True False \n", + "\n", + " prod_cat_soup_broth_bouillon prod_cat_spices_seasonings prod_cat_tea \\\n", + "0 False False False \n", + "1 False False False \n", + "2 False False False \n", + "3 False False False \n", + "4 False False False \n", + "\n", + " prod_cat_vitamins_supplements prod_cat_yogurt \n", + "0 True False \n", + "1 False False \n", + "2 False False \n", + "3 False False \n", + "4 False False \n", + "\n", + "[5 rows x 24 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_rank_features.head()" ] @@ -571,9 +1432,20 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 25, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.10/site-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'DataFrame.swapaxes' is deprecated and will be removed in a future version. Please use 'DataFrame.transpose' instead.\n", + " return bound(*args, **kwds)\n" + ] + } + ], "source": [ "train_data, validation_data, _ = np.split(df_rank_features.sample(frac=1, random_state=1729), [int(0.7 * len(df_rank_features)), int(0.9 * len(df_rank_features))])\n", "train_data.to_csv('train.csv', header=False, index=False)\n", @@ -589,8 +1461,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 26, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "boto3.Session().resource('s3').Bucket(default_bucket).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')\n", @@ -619,11 +1493,157 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { - "scrolled": true + "scrolled": true, + "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.\n", + "INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-05-29-00-40-27-642\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-05-29 00:40:27 Starting - Starting the training job...\n", + "2024-05-29 00:40:43 Starting - Preparing the instances for training...\n", + "2024-05-29 00:41:13 Downloading - Downloading input data...\n", + "2024-05-29 00:41:33 Downloading - Downloading the training image......\n", + "2024-05-29 00:42:34 Training - Training image download completed. Training in progress..\u001b[34m[2024-05-29 00:42:51.157 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Imported framework sagemaker_xgboost_container.training\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.\u001b[0m\n", + "\u001b[34mReturning the value itself\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] No GPUs detected (normal if no gpus installed)\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Running XGBoost Sagemaker in algorithm mode\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Determined delimiter of CSV input is ','\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Determined delimiter of CSV input is ','\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Determined delimiter of CSV input is ','\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Determined delimiter of CSV input is ','\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Single node training.\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Train matrix has 139965 rows and 23 columns\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Validation matrix has 39990 rows\u001b[0m\n", + "\u001b[34m[2024-05-29 00:42:51.475 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO json_config.py:91] Creating hook from json_config at /opt/ml/input/config/debughookconfig.json.\u001b[0m\n", + "\u001b[34m[2024-05-29 00:42:51.476 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO hook.py:201] tensorboard_dir has not been set for the hook. SMDebug will not be exporting tensorboard summaries.\u001b[0m\n", + "\u001b[34m[2024-05-29 00:42:51.476 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO profiler_config_parser.py:102] User has disabled profiler.\u001b[0m\n", + "\u001b[34m[2024-05-29 00:42:51.477 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO hook.py:255] Saving to /opt/ml/output/tensors\u001b[0m\n", + "\u001b[34m[2024-05-29 00:42:51.477 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO state_store.py:77] The checkpoint config file /opt/ml/input/config/checkpointconfig.json does not exist.\u001b[0m\n", + "\u001b[34m[2024-05-29:00:42:51:INFO] Debug hook created from config\u001b[0m\n", + "\u001b[34m[00:42:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 4 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[0]#011train-error:0.06857#011validation-error:0.06824\u001b[0m\n", + "\u001b[34m[2024-05-29 00:42:51.628 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO hook.py:423] Monitoring the collections: metrics\u001b[0m\n", + "\u001b[34m[2024-05-29 00:42:51.632 ip-10-0-241-94.ap-southeast-2.compute.internal:7 INFO hook.py:486] Hook is writing from the hook with pid: 7\u001b[0m\n", + "\u001b[34m[00:42:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[1]#011train-error:0.06822#011validation-error:0.06864\u001b[0m\n", + "\u001b[34m[00:42:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[2]#011train-error:0.06740#011validation-error:0.06917\u001b[0m\n", + "\u001b[34m[00:42:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[3]#011train-error:0.06745#011validation-error:0.06917\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[4]#011train-error:0.06751#011validation-error:0.06839\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 36 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[5]#011train-error:0.06785#011validation-error:0.06799\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[6]#011train-error:0.06742#011validation-error:0.06842\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[7]#011train-error:0.06760#011validation-error:0.06774\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[8]#011train-error:0.06757#011validation-error:0.06744\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[9]#011train-error:0.06754#011validation-error:0.06757\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 32 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[10]#011train-error:0.06696#011validation-error:0.06844\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 4 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[11]#011train-error:0.06698#011validation-error:0.06869\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 22 pruned nodes, max_depth=4\u001b[0m\n", + "\u001b[34m[12]#011train-error:0.06699#011validation-error:0.06877\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[13]#011train-error:0.06689#011validation-error:0.06832\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 34 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[14]#011train-error:0.06695#011validation-error:0.06822\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[15]#011train-error:0.06667#011validation-error:0.06832\u001b[0m\n", + "\u001b[34m[00:42:52] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[16]#011train-error:0.06672#011validation-error:0.06817\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 36 pruned nodes, max_depth=4\u001b[0m\n", + "\u001b[34m[17]#011train-error:0.06657#011validation-error:0.06807\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 0 extra nodes, 12 pruned nodes, max_depth=0\u001b[0m\n", + "\u001b[34m[18]#011train-error:0.06657#011validation-error:0.06807\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[19]#011train-error:0.06667#011validation-error:0.06819\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 18 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[20]#011train-error:0.06650#011validation-error:0.06834\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 28 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[21]#011train-error:0.06630#011validation-error:0.06879\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[22]#011train-error:0.06617#011validation-error:0.06882\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 16 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[23]#011train-error:0.06613#011validation-error:0.06862\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[24]#011train-error:0.06610#011validation-error:0.06864\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 38 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[25]#011train-error:0.06610#011validation-error:0.06929\u001b[0m\n", + "\u001b[34m[00:42:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[26]#011train-error:0.06607#011validation-error:0.06944\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 16 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[27]#011train-error:0.06610#011validation-error:0.06922\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[28]#011train-error:0.06607#011validation-error:0.06939\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 20 pruned nodes, max_depth=4\u001b[0m\n", + "\u001b[34m[29]#011train-error:0.06607#011validation-error:0.06949\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[30]#011train-error:0.06622#011validation-error:0.06989\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 20 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[31]#011train-error:0.06612#011validation-error:0.07002\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 34 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[32]#011train-error:0.06605#011validation-error:0.06967\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 32 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[33]#011train-error:0.06599#011validation-error:0.06952\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 0 extra nodes, 18 pruned nodes, max_depth=0\u001b[0m\n", + "\u001b[34m[34]#011train-error:0.06597#011validation-error:0.06947\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 18 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[35]#011train-error:0.06589#011validation-error:0.06967\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[36]#011train-error:0.06611#011validation-error:0.06929\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[37]#011train-error:0.06593#011validation-error:0.06982\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[38]#011train-error:0.06589#011validation-error:0.06967\u001b[0m\n", + "\u001b[34m[00:42:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 18 pruned nodes, max_depth=4\u001b[0m\n", + "\u001b[34m[39]#011train-error:0.06594#011validation-error:0.06969\u001b[0m\n", + "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 30 pruned nodes, max_depth=4\u001b[0m\n", + "\u001b[34m[40]#011train-error:0.06577#011validation-error:0.06984\u001b[0m\n", + "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 0 extra nodes, 22 pruned nodes, max_depth=0\u001b[0m\n", + "\u001b[34m[41]#011train-error:0.06582#011validation-error:0.06982\u001b[0m\n", + "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[42]#011train-error:0.06562#011validation-error:0.06999\u001b[0m\n", + "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 4 extra nodes, 22 pruned nodes, max_depth=2\u001b[0m\n", + "\u001b[34m[43]#011train-error:0.06562#011validation-error:0.06999\u001b[0m\n", + "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 20 pruned nodes, max_depth=4\u001b[0m\n", + "\u001b[34m[44]#011train-error:0.06554#011validation-error:0.06987\u001b[0m\n", + "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[45]#011train-error:0.06560#011validation-error:0.06947\u001b[0m\n", + "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 24 pruned nodes, max_depth=4\u001b[0m\n", + "\u001b[34m[46]#011train-error:0.06555#011validation-error:0.06957\u001b[0m\n", + "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[47]#011train-error:0.06557#011validation-error:0.07034\u001b[0m\n", + "\u001b[34m[00:42:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[48]#011train-error:0.06561#011validation-error:0.07037\u001b[0m\n", + "\u001b[34m[00:42:56] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 16 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n", + "\u001b[34m[49]#011train-error:0.06568#011validation-error:0.07027\u001b[0m\n", + "\n", + "2024-05-29 00:43:18 Uploading - Uploading generated training model\n", + "2024-05-29 00:43:18 Completed - Training job completed\n", + "Training seconds: 124\n", + "Billable seconds: 124\n" + ] + } + ], "source": [ "container = sagemaker.image_uris.retrieve('xgboost', region, version='1.2-2')\n", "\n", @@ -664,9 +1684,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 28, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker:Creating model with name: sagemaker-xgboost-2024-05-29-00-43-39-427\n", + "INFO:sagemaker:Creating endpoint-config with name recsys-rerank-model-05-29-00-34\n", + "INFO:sagemaker:Creating endpoint with name recsys-rerank-model-05-29-00-34\n" + ] + } + ], "source": [ "xgb_predictor = xgb.deploy(\n", " endpoint_name = ranking_model_endpoint_name,\n", @@ -679,9 +1711,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 29, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'recsys-rerank-model-05-29-00-34'" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "xgb_predictor.endpoint_name" ] @@ -695,8 +1740,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 30, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def top_rated_products_by_customer_state(customer_id, top_n):\n", @@ -735,9 +1782,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 32, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Stored 'cf_inference_payload' (ndarray)\n", + "Stored 'cf_inference_df' (DataFrame)\n", + "date and time: 29/05/2024 00:44:47\n" + ] + } + ], "source": [ "customer_id = 'C3571'\n", "cf_inference_df = top_rated_products_by_customer_state(customer_id, 15)\n", @@ -760,14 +1819,619 @@ } ], "metadata": { - "instance_type": "ml.t3.medium", + "availableInstances": [ + { + "_defaultOrder": 0, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.t3.medium", + "vcpuNum": 2 + }, + { + "_defaultOrder": 1, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.t3.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 2, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.t3.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 3, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.t3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 4, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 5, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 6, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 7, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 8, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 9, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 10, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 11, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 12, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5d.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 13, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5d.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 14, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5d.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 15, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5d.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 16, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5d.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 17, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5d.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 18, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5d.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 19, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 20, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": true, + "memoryGiB": 0, + "name": "ml.geospatial.interactive", + "supportedImageNames": [ + "sagemaker-geospatial-v1-0" + ], + "vcpuNum": 0 + }, + { + "_defaultOrder": 21, + "_isFastLaunch": true, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.c5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 22, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.c5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 23, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.c5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 24, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.c5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 25, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 72, + "name": "ml.c5.9xlarge", + "vcpuNum": 36 + }, + { + "_defaultOrder": 26, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 96, + "name": "ml.c5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 27, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 144, + "name": "ml.c5.18xlarge", + "vcpuNum": 72 + }, + { + "_defaultOrder": 28, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.c5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 29, + "_isFastLaunch": true, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g4dn.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 30, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g4dn.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 31, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g4dn.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 32, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g4dn.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 33, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g4dn.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 34, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g4dn.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 35, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 61, + "name": "ml.p3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 36, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 244, + "name": "ml.p3.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 37, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 488, + "name": "ml.p3.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 38, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.p3dn.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 39, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.r5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 40, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.r5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 41, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.r5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 42, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.r5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 43, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.r5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 44, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.r5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 45, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.r5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 46, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.r5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 47, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 48, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 49, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 50, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 51, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 52, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 53, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.g5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 54, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.g5.48xlarge", + "vcpuNum": 192 + }, + { + "_defaultOrder": 55, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 56, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4de.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 57, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.trn1.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 58, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1.32xlarge", + "vcpuNum": 128 + }, + { + "_defaultOrder": 59, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1n.32xlarge", + "vcpuNum": 128 + } + ], + "instance_type": "ml.m5.large", "interpreter": { "hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322" }, "kernelspec": { - "display_name": "Python 3 (Data Science)", + "display_name": "Python 3 (Data Science 3.0)", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1" }, "language_info": { "codemirror_mode": { @@ -779,7 +2443,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/3_click_stream_kinesis.ipynb b/3_click_stream_kinesis.ipynb index 216671b..a3d1f99 100644 --- a/3_click_stream_kinesis.ipynb +++ b/3_click_stream_kinesis.ipynb @@ -33,8 +33,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import pandas as pd\n", @@ -61,8 +63,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 7, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "role = sagemaker.get_execution_role()\n", @@ -85,8 +89,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 8, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "current_time = time.strftime(\"%m-%d-%H-%M-%S\", time.localtime())\n", @@ -105,8 +111,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 9, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "parameters = ps.read()\n", @@ -118,9 +126,19 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 10, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 29/05/2024 00:44:52\n" + ] + } + ], "source": [ "ps.add({'kinesis_stream_name': kinesis_stream_name,\n", " 'kinesis_analytics_application_name': kinesis_analytics_application_name})\n", @@ -143,9 +161,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 11, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting for the Kinesis stream to become active...\n", + "ACTIVE\n", + "Amazon kinesis stream arn: arn:aws:kinesis:ap-southeast-2:XXXXXXXXXXXX:stream/fs-click-stream-activity-05-29-00-44-52\n" + ] + } + ], "source": [ "kinesis_client = boto3.client('kinesis')\n", "kinesis_client.create_stream(StreamName=kinesis_stream_name, ShardCount=1)\n", @@ -177,8 +207,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 12, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "kda_client = boto3.client('kinesisanalytics')" @@ -186,8 +218,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 13, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "sql_code = '''\n", @@ -217,8 +251,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 14, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "kda_input_schema = [{\n", @@ -270,18 +306,113 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 15, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mjson\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mbase64\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36msubprocess\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mos\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36msys\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mdatetime\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m datetime\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mtime\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mboto3\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + "\u001b[36mprint\u001b[39;49;00m(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mboto3 version: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mboto3.__version__\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mtry\u001b[39;49;00m:\u001b[37m\u001b[39;49;00m\n", + " sm = boto3.Session().client(service_name=\u001b[33m\"\u001b[39;49;00m\u001b[33msagemaker\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n", + " sm_fs = boto3.Session().client(service_name=\u001b[33m\"\u001b[39;49;00m\u001b[33msagemaker-featurestore-runtime\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mexcept\u001b[39;49;00m:\u001b[37m\u001b[39;49;00m\n", + " \u001b[36mprint\u001b[39;49;00m(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mFailed while connecting to SageMaker Feature Store\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n", + " \u001b[36mprint\u001b[39;49;00m(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mUnexpected error: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00msys.exc_info()[\u001b[34m0\u001b[39;49;00m]\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m# Read Environment Vars\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + "CUSTOMER_ACTIVITY_FEATURE_GROUP = os.environ[\u001b[33m\"\u001b[39;49;00m\u001b[33mclick_stream_feature_group_name\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mdef\u001b[39;49;00m \u001b[32mingest_record\u001b[39;49;00m(\u001b[37m\u001b[39;49;00m\n", + " fg_name, customer_id, sum_activity_weight_last_2m, avg_product_health_index_last_2m\u001b[37m\u001b[39;49;00m\n", + "):\u001b[37m\u001b[39;49;00m\n", + " record = [\u001b[37m\u001b[39;49;00m\n", + " {\u001b[33m\"\u001b[39;49;00m\u001b[33mFeatureName\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33mcustomer_id\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[33m\"\u001b[39;49;00m\u001b[33mValueAsString\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[36mstr\u001b[39;49;00m(customer_id)},\u001b[37m\u001b[39;49;00m\n", + " {\u001b[37m\u001b[39;49;00m\n", + " \u001b[33m\"\u001b[39;49;00m\u001b[33mFeatureName\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33msum_activity_weight_last_2m\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m,\u001b[37m\u001b[39;49;00m\n", + " \u001b[33m\"\u001b[39;49;00m\u001b[33mValueAsString\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[36mstr\u001b[39;49;00m(sum_activity_weight_last_2m),\u001b[37m\u001b[39;49;00m\n", + " },\u001b[37m\u001b[39;49;00m\n", + " {\u001b[37m\u001b[39;49;00m\n", + " \u001b[33m\"\u001b[39;49;00m\u001b[33mFeatureName\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33mavg_product_health_index_last_2m\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m,\u001b[37m\u001b[39;49;00m\n", + " \u001b[33m\"\u001b[39;49;00m\u001b[33mValueAsString\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[36mstr\u001b[39;49;00m(avg_product_health_index_last_2m),\u001b[37m\u001b[39;49;00m\n", + " },\u001b[37m\u001b[39;49;00m\n", + " {\u001b[37m\u001b[39;49;00m\n", + " \u001b[33m\"\u001b[39;49;00m\u001b[33mFeatureName\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33mevent_time\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m,\u001b[37m\u001b[39;49;00m\n", + " \u001b[33m\"\u001b[39;49;00m\u001b[33mValueAsString\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[36mstr\u001b[39;49;00m(\u001b[36mint\u001b[39;49;00m(\u001b[36mround\u001b[39;49;00m(time.time()))),\u001b[37m\u001b[39;49;00m\n", + " },\u001b[37m\u001b[39;49;00m\n", + " ]\u001b[37m\u001b[39;49;00m\n", + " sm_fs.put_record(FeatureGroupName=fg_name, Record=record)\u001b[37m\u001b[39;49;00m\n", + " \u001b[34mreturn\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + "\u001b[34mdef\u001b[39;49;00m \u001b[32mlambda_handler\u001b[39;49;00m(event, context):\u001b[37m\u001b[39;49;00m\n", + " inv_id = event[\u001b[33m\"\u001b[39;49;00m\u001b[33minvocationId\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n", + " app_arn = event[\u001b[33m\"\u001b[39;49;00m\u001b[33mapplicationArn\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n", + " records = event[\u001b[33m\"\u001b[39;49;00m\u001b[33mrecords\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n", + " \u001b[36mprint\u001b[39;49;00m(\u001b[37m\u001b[39;49;00m\n", + " \u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mReceived \u001b[39;49;00m\u001b[33m{\u001b[39;49;00m\u001b[36mlen\u001b[39;49;00m(records)\u001b[33m}\u001b[39;49;00m\u001b[33m records, invocation id: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00minv_id\u001b[33m}\u001b[39;49;00m\u001b[33m, app arn: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mapp_arn\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + " )\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + " ret_records = []\u001b[37m\u001b[39;49;00m\n", + " \u001b[34mfor\u001b[39;49;00m rec \u001b[35min\u001b[39;49;00m records:\u001b[37m\u001b[39;49;00m\n", + " data = rec[\u001b[33m\"\u001b[39;49;00m\u001b[33mdata\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n", + " agg_data_str = base64.b64decode(data)\u001b[37m\u001b[39;49;00m\n", + " agg_data = json.loads(agg_data_str)\u001b[37m\u001b[39;49;00m\n", + " \u001b[36mprint\u001b[39;49;00m(agg_data)\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + " customer_id = agg_data[\u001b[33m\"\u001b[39;49;00m\u001b[33mCUSTOMER_ID\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n", + " sum_activity_weight_last_2m = agg_data[\u001b[33m\"\u001b[39;49;00m\u001b[33mSUM_ACTIVITY_WEIGHT_LAST_2M\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n", + " avg_product_health_index_last_2m = agg_data[\u001b[33m\"\u001b[39;49;00m\u001b[33mAVG_PRODUCT_HEALTH_INDEX_LAST_2M\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m]\u001b[37m\u001b[39;49;00m\n", + " \u001b[36mprint\u001b[39;49;00m(\u001b[37m\u001b[39;49;00m\n", + " \u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mUpdating agg features for customerId: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mcustomer_id\u001b[33m}\u001b[39;49;00m\u001b[33m, Sum of activity weight last 2m: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00msum_activity_weight_last_2m\u001b[33m}\u001b[39;49;00m\u001b[33m, Average product health index last 2m: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mavg_product_health_index_last_2m\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + " )\u001b[37m\u001b[39;49;00m\n", + " ingest_record(\u001b[37m\u001b[39;49;00m\n", + " CUSTOMER_ACTIVITY_FEATURE_GROUP,\u001b[37m\u001b[39;49;00m\n", + " customer_id,\u001b[37m\u001b[39;49;00m\n", + " sum_activity_weight_last_2m,\u001b[37m\u001b[39;49;00m\n", + " avg_product_health_index_last_2m,\u001b[37m\u001b[39;49;00m\n", + " )\u001b[37m\u001b[39;49;00m\n", + "\u001b[37m\u001b[39;49;00m\n", + " \u001b[37m# Flag each record as being \"Ok\", so that Kinesis won't try to re-send\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n", + " ret_records.append({\u001b[33m\"\u001b[39;49;00m\u001b[33mrecordId\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: rec[\u001b[33m\"\u001b[39;49;00m\u001b[33mrecordId\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m], \u001b[33m\"\u001b[39;49;00m\u001b[33mresult\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: \u001b[33m\"\u001b[39;49;00m\u001b[33mOk\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m})\u001b[37m\u001b[39;49;00m\n", + " \u001b[34mreturn\u001b[39;49;00m {\u001b[33m\"\u001b[39;49;00m\u001b[33mrecords\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: ret_records}\u001b[37m\u001b[39;49;00m\n" + ] + } + ], "source": [ "!pygmentize ./scripts/lambda-stream.py" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 16, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Lambda function arn: arn:aws:lambda:ap-southeast-2:XXXXXXXXXXXX:function:click-stream-aggregator-lambda05-29-00-44-52\n" + ] + } + ], "source": [ "lambda_function = Lambda(\n", " function_name=lambda_name,\n", @@ -295,7 +426,8 @@ "lambda_function_response = lambda_function.create()\n", "lambda_function_arn = lambda_function_response['FunctionArn']\n", "\n", - "print(f'Lambda function arn: {lambda_function_arn}')" + "print(f'Lambda function arn: {lambda_function_arn}')\n", + "time.sleep(5)\n" ] }, { @@ -307,9 +439,56 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 17, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ResponseMetadata': {'RequestId': '5aeb83d7-25da-4b4b-8cc1-914f3c23273e',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'date': 'Wed, 29 May 2024 00:45:20 GMT',\n", + " 'content-type': 'application/json',\n", + " 'content-length': '1573',\n", + " 'connection': 'keep-alive',\n", + " 'x-amzn-requestid': '5aeb83d7-25da-4b4b-8cc1-914f3c23273e'},\n", + " 'RetryAttempts': 0},\n", + " 'FunctionName': 'click-stream-aggregator-lambda05-29-00-44-52',\n", + " 'FunctionArn': 'arn:aws:lambda:ap-southeast-2:XXXXXXXXXXXX:function:click-stream-aggregator-lambda05-29-00-44-52',\n", + " 'Runtime': 'python3.8',\n", + " 'Role': 'arn:aws:iam::XXXXXXXXXXXX:role/service-role/AmazonSageMaker-ExecutionRole-20221031T192874',\n", + " 'Handler': 'lambda-stream.lambda_handler',\n", + " 'CodeSize': 2626,\n", + " 'Description': '',\n", + " 'Timeout': 600,\n", + " 'MemorySize': 10240,\n", + " 'LastModified': '2024-05-29T00:45:20.000+0000',\n", + " 'CodeSha256': '4MK66qjsy14fTFs3ZPbvrAssA13JG6abyFNz27wqlfI=',\n", + " 'Version': '$LATEST',\n", + " 'Environment': {'Variables': {'click_stream_feature_group_name': 'recsys-click-stream-fg-05-29-00-10'}},\n", + " 'TracingConfig': {'Mode': 'PassThrough'},\n", + " 'RevisionId': '68980d47-1acc-48a5-b09e-2d8675919a95',\n", + " 'Layers': [],\n", + " 'State': 'Active',\n", + " 'LastUpdateStatus': 'InProgress',\n", + " 'LastUpdateStatusReason': 'The function is being created.',\n", + " 'LastUpdateStatusReasonCode': 'Creating',\n", + " 'PackageType': 'Zip',\n", + " 'Architectures': ['x86_64'],\n", + " 'EphemeralStorage': {'Size': 512},\n", + " 'SnapStart': {'ApplyOn': 'None', 'OptimizationStatus': 'Off'},\n", + " 'RuntimeVersionConfig': {'RuntimeVersionArn': 'arn:aws:lambda:ap-southeast-2::runtime:f4a0b40874efd83bc0930836198f794b8c0cea2e4e864a3dab58e98fa481131e'},\n", + " 'LoggingConfig': {'LogFormat': 'Text',\n", + " 'LogGroup': '/aws/lambda/click-stream-aggregator-lambda05-29-00-44-52'}}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "lambda_client = boto3.client('lambda')\n", "lambda_client.update_function_configuration(FunctionName=lambda_name,\n", @@ -329,9 +508,19 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 18, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "KDA output schema: [{'LambdaOutput': {'ResourceARN': 'arn:aws:lambda:ap-southeast-2:XXXXXXXXXXXX:function:click-stream-aggregator-lambda05-29-00-44-52', 'RoleARN': 'arn:aws:iam::XXXXXXXXXXXX:role/service-role/AmazonSageMaker-ExecutionRole-20221031T192874'}, 'Name': 'DESTINATION_SQL_STREAM', 'DestinationSchema': {'RecordFormatType': 'JSON'}}]\n" + ] + } + ], "source": [ "kda_output_schema = [{'LambdaOutput': {'ResourceARN': lambda_function_arn, 'RoleARN': role},\n", " 'Name': 'DESTINATION_SQL_STREAM',\n", @@ -348,9 +537,19 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 19, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "READY\n" + ] + } + ], "source": [ "creating_app = False\n", "while not creating_app:\n", @@ -376,9 +575,28 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 20, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ResponseMetadata': {'RequestId': 'e9a391a1-b821-4e67-bdfe-ac274f9df484',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': 'e9a391a1-b821-4e67-bdfe-ac274f9df484',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '2',\n", + " 'date': 'Wed, 29 May 2024 00:45:20 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "kda_client.start_application(ApplicationName=kinesis_analytics_application_name,\n", " InputConfigurations=[{'Id': '1.1',\n", @@ -395,9 +613,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 21, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting for the Kinesis Application to be in RUNNING state...\n", + "Waiting for the Kinesis Application to be in RUNNING state...\n", + "Waiting for the Kinesis Application to be in RUNNING state...\n", + "RUNNING\n" + ] + } + ], "source": [ "running_app = False\n", "while not running_app:\n", @@ -419,8 +650,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 22, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def generate_click_stream_data(customer_id, product_health_index_low, product_health_index_high):\n", @@ -491,9 +724,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 23, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'event_time': '2024-05-29T00:46:11.125655', 'customer_id': 'C3571', 'product_id': 'P1155', 'product_category': 'baking_ingredients', 'activity_type': 'added_to_cart', 'activity_weight': 2, 'product_health_index': 0.3}\n", + "{'event_time': '2024-05-29T00:46:16.706036', 'customer_id': 'C3571', 'product_id': 'P629', 'product_category': 'packaged_cheese', 'activity_type': 'saved_for_later', 'activity_weight': 2, 'product_health_index': 0.3}\n", + "{'event_time': '2024-05-29T00:46:22.111948', 'customer_id': 'C3571', 'product_id': 'P13123', 'product_category': 'baking_ingredients', 'activity_type': 'liked', 'activity_weight': 1, 'product_health_index': 0.3}\n", + "{'event_time': '2024-05-29T00:46:27.493559', 'customer_id': 'C3571', 'product_id': 'P14170', 'product_category': 'ice_cream_ice', 'activity_type': 'added_to_wish_list', 'activity_weight': 1, 'product_health_index': 0.1}\n", + "{'event_time': '2024-05-29T00:46:32.890340', 'customer_id': 'C3571', 'product_id': 'P10430', 'product_category': 'chips_pretzels', 'activity_type': 'liked', 'activity_weight': 1, 'product_health_index': 0.2}\n", + "{'event_time': '2024-05-29T00:46:38.264964', 'customer_id': 'C3571', 'product_id': 'P2970', 'product_category': 'chips_pretzels', 'activity_type': 'saved_for_later', 'activity_weight': 2, 'product_health_index': 0.2}\n" + ] + } + ], "source": [ "put_records_in_kinesis_stream(inference_customer_id, 0.1, 0.3)\n", "# It takes 2 minutes for KDA to call lambda to update feature store \n", @@ -510,9 +758,20 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 24, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Online feature store data for customer id C3571\n", + "Record: {'ResponseMetadata': {'RequestId': '6b91b1a3-af4c-4dcf-a710-d38365702840', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '6b91b1a3-af4c-4dcf-a710-d38365702840', 'content-type': 'application/json', 'content-length': '396', 'date': 'Wed, 29 May 2024 00:48:53 GMT'}, 'RetryAttempts': 0}, 'Record': [{'FeatureName': 'customer_id', 'ValueAsString': 'C3571'}, {'FeatureName': 'sum_activity_weight_last_2m', 'ValueAsString': '9'}, {'FeatureName': 'avg_product_health_index_last_2m', 'ValueAsString': '0.23333333333333336'}, {'FeatureName': 'event_time', 'ValueAsString': '1716943709'}]}\n" + ] + } + ], "source": [ "record = featurestore_runtime.get_record(FeatureGroupName=click_stream_feature_group_name,\n", " RecordIdentifierValueAsString=inference_customer_id)\n", @@ -529,14 +788,619 @@ } ], "metadata": { - "instance_type": "ml.t3.medium", + "availableInstances": [ + { + "_defaultOrder": 0, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.t3.medium", + "vcpuNum": 2 + }, + { + "_defaultOrder": 1, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.t3.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 2, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.t3.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 3, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.t3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 4, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 5, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 6, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 7, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 8, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 9, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 10, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 11, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 12, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5d.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 13, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5d.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 14, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5d.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 15, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5d.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 16, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5d.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 17, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5d.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 18, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5d.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 19, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 20, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": true, + "memoryGiB": 0, + "name": "ml.geospatial.interactive", + "supportedImageNames": [ + "sagemaker-geospatial-v1-0" + ], + "vcpuNum": 0 + }, + { + "_defaultOrder": 21, + "_isFastLaunch": true, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.c5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 22, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.c5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 23, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.c5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 24, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.c5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 25, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 72, + "name": "ml.c5.9xlarge", + "vcpuNum": 36 + }, + { + "_defaultOrder": 26, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 96, + "name": "ml.c5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 27, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 144, + "name": "ml.c5.18xlarge", + "vcpuNum": 72 + }, + { + "_defaultOrder": 28, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.c5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 29, + "_isFastLaunch": true, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g4dn.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 30, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g4dn.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 31, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g4dn.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 32, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g4dn.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 33, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g4dn.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 34, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g4dn.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 35, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 61, + "name": "ml.p3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 36, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 244, + "name": "ml.p3.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 37, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 488, + "name": "ml.p3.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 38, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.p3dn.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 39, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.r5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 40, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.r5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 41, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.r5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 42, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.r5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 43, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.r5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 44, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.r5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 45, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.r5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 46, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.r5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 47, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 48, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 49, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 50, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 51, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 52, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 53, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.g5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 54, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.g5.48xlarge", + "vcpuNum": 192 + }, + { + "_defaultOrder": 55, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 56, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4de.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 57, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.trn1.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 58, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1.32xlarge", + "vcpuNum": 128 + }, + { + "_defaultOrder": 59, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1n.32xlarge", + "vcpuNum": 128 + } + ], + "instance_type": "ml.m5.large", "interpreter": { "hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322" }, "kernelspec": { - "display_name": "Python 3 (Data Science)", + "display_name": "Python 3 (Data Science 3.0)", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1" }, "language_info": { "codemirror_mode": { @@ -548,7 +1412,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/4_realtime_recommendations.ipynb b/4_realtime_recommendations.ipynb index f731f13..7e90b9f 100644 --- a/4_realtime_recommendations.ipynb +++ b/4_realtime_recommendations.ipynb @@ -34,9 +34,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 40, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (2.2.2)\n", + "Requirement already satisfied: numpy>=1.22.4 in /opt/conda/lib/python3.10/site-packages (from pandas) (1.26.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas) (2.9.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.10/site-packages (from pandas) (2024.1)\n", + "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0m" + ] + } + ], "source": [ "!pip install --upgrade pandas" ] @@ -50,8 +67,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 41, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import sagemaker\n", @@ -77,8 +96,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 42, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "role = sagemaker.get_execution_role()\n", @@ -92,8 +113,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 43, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "query_results= 'sagemaker-recsys-featurestore-workshop'\n", @@ -109,8 +132,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 45, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "parameters = ps.read()\n", @@ -153,8 +178,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 46, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Make sure model has finished deploying\n", @@ -171,8 +198,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 47, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Pass in our cached data as input to the Collaborative Filtering model\n", @@ -195,9 +224,188 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 48, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcustomer_idproduct_idstateageis_marriedproduct_namepredictions
06C3571P6176maine350pretzel \\\"shells\\\"1.723941
18C3571P11086maine350almonds mini nut-thins cheddar cheese1.670082
22C3571P16823maine350pirouette chocolate fudge creme filled wafers1.640127
312C3571P15430maine350organic pork chop seasoning1.639393
45C3571P6247maine350fruit punch roarin' waters1.587156
57C3571P14539maine350chocolate, organic, unsweetened, 100% cacao1.505779
61C3571P10682maine350mini cakes birthday cake1.501803
74C3571P4152maine350white cheddar bunnies1.464058
813C3571P7822maine350degreaser1.454165
93C3571P5429maine350organic sweet potato puree1.414727
\n", + "
" + ], + "text/plain": [ + " index customer_id product_id state age is_married \\\n", + "0 6 C3571 P6176 maine 35 0 \n", + "1 8 C3571 P11086 maine 35 0 \n", + "2 2 C3571 P16823 maine 35 0 \n", + "3 12 C3571 P15430 maine 35 0 \n", + "4 5 C3571 P6247 maine 35 0 \n", + "5 7 C3571 P14539 maine 35 0 \n", + "6 1 C3571 P10682 maine 35 0 \n", + "7 4 C3571 P4152 maine 35 0 \n", + "8 13 C3571 P7822 maine 35 0 \n", + "9 3 C3571 P5429 maine 35 0 \n", + "\n", + " product_name predictions \n", + "0 pretzel \\\"shells\\\" 1.723941 \n", + "1 almonds mini nut-thins cheddar cheese 1.670082 \n", + "2 pirouette chocolate fudge creme filled wafers 1.640127 \n", + "3 organic pork chop seasoning 1.639393 \n", + "4 fruit punch roarin' waters 1.587156 \n", + "5 chocolate, organic, unsweetened, 100% cacao 1.505779 \n", + "6 mini cakes birthday cake 1.501803 \n", + "7 white cheddar bunnies 1.464058 \n", + "8 degreaser 1.454165 \n", + "9 organic sweet potato puree 1.414727 " + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cf_inference_df" ] @@ -225,8 +433,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 49, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Make sure model has finished deploying\n", @@ -249,8 +459,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 50, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "query = f'''\n", @@ -265,7 +477,11 @@ "df_one_hot_cat_features = pd.DataFrame(one_hot_cat_features)\n", "df_one_hot_cat_features.columns = ['product_category']\n", "\n", - "df_one_hot_cat_features = pd.concat([df_one_hot_cat_features, pd.get_dummies(df_one_hot_cat_features['product_category'], prefix='cat')],axis=1)" + "df_one_hot_cat_features = pd.concat([df_one_hot_cat_features, pd.get_dummies(df_one_hot_cat_features['product_category'], prefix='cat')],axis=1)\n", + "\n", + "# Convert dummy variables to integers\n", + "for col in df_one_hot_cat_features.columns[1:]:\n", + " df_one_hot_cat_features[col] = df_one_hot_cat_features[col].astype(int)" ] }, { @@ -282,8 +498,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 51, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def get_ranking_model_input_data(df, df_one_hot_cat_features):\n", @@ -295,7 +513,7 @@ " customer_record = featurestore_runtime.get_record(FeatureGroupName=customers_feature_group_name,\n", " RecordIdentifierValueAsString=customer_id,\n", " FeatureNames=['customer_health_index'])\n", - " \n", + " print(customer_record)\n", " customer_health_index = customer_record['Record'][0]['ValueAsString']\n", " \n", " # Get product features (instead of looping, you can optionally use\n", @@ -323,6 +541,7 @@ " RecordIdentifierValueAsString=customer_id,\n", " FeatureNames=['sum_activity_weight_last_2m',\n", " 'avg_product_health_index_last_2m'])\n", + " print(click_stream_record)\n", " \n", " # Calculate healthy_activity_last_2m as this will influence ranking as well\n", " sum_activity_weight_last_2m = click_stream_record['Record'][0]['ValueAsString']\n", @@ -365,9 +584,188 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 52, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcustomer_idproduct_idstateageis_marriedproduct_namepredictions
06C3571P6176maine350pretzel \\\"shells\\\"1.723941
18C3571P11086maine350almonds mini nut-thins cheddar cheese1.670082
22C3571P16823maine350pirouette chocolate fudge creme filled wafers1.640127
312C3571P15430maine350organic pork chop seasoning1.639393
45C3571P6247maine350fruit punch roarin' waters1.587156
57C3571P14539maine350chocolate, organic, unsweetened, 100% cacao1.505779
61C3571P10682maine350mini cakes birthday cake1.501803
74C3571P4152maine350white cheddar bunnies1.464058
813C3571P7822maine350degreaser1.454165
93C3571P5429maine350organic sweet potato puree1.414727
\n", + "
" + ], + "text/plain": [ + " index customer_id product_id state age is_married \\\n", + "0 6 C3571 P6176 maine 35 0 \n", + "1 8 C3571 P11086 maine 35 0 \n", + "2 2 C3571 P16823 maine 35 0 \n", + "3 12 C3571 P15430 maine 35 0 \n", + "4 5 C3571 P6247 maine 35 0 \n", + "5 7 C3571 P14539 maine 35 0 \n", + "6 1 C3571 P10682 maine 35 0 \n", + "7 4 C3571 P4152 maine 35 0 \n", + "8 13 C3571 P7822 maine 35 0 \n", + "9 3 C3571 P5429 maine 35 0 \n", + "\n", + " product_name predictions \n", + "0 pretzel \\\"shells\\\" 1.723941 \n", + "1 almonds mini nut-thins cheddar cheese 1.670082 \n", + "2 pirouette chocolate fudge creme filled wafers 1.640127 \n", + "3 organic pork chop seasoning 1.639393 \n", + "4 fruit punch roarin' waters 1.587156 \n", + "5 chocolate, organic, unsweetened, 100% cacao 1.505779 \n", + "6 mini cakes birthday cake 1.501803 \n", + "7 white cheddar bunnies 1.464058 \n", + "8 degreaser 1.454165 \n", + "9 organic sweet potato puree 1.414727 " + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cf_inference_df" ] @@ -381,15 +779,39 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 53, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ResponseMetadata': {'RequestId': 'a8a788d0-13d3-4a21-9e04-5fd505a2cf3a', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'a8a788d0-13d3-4a21-9e04-5fd505a2cf3a', 'content-type': 'application/json', 'content-length': '131', 'date': 'Wed, 29 May 2024 00:49:52 GMT'}, 'RetryAttempts': 0}, 'Record': [{'FeatureName': 'customer_health_index', 'ValueAsString': '0.0952770902420399'}]}\n", + "{'ResponseMetadata': {'RequestId': 'b80bfb05-4a85-46a2-9e3a-97698adfd24d', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'b80bfb05-4a85-46a2-9e3a-97698adfd24d', 'content-type': 'application/json', 'content-length': '234', 'date': 'Wed, 29 May 2024 00:49:53 GMT'}, 'RetryAttempts': 0}, 'Record': [{'FeatureName': 'sum_activity_weight_last_2m', 'ValueAsString': '9'}, {'FeatureName': 'avg_product_health_index_last_2m', 'ValueAsString': '0.23333333333333336'}]}\n" + ] + } + ], "source": [ "# Construct input data for the ranking model\n", - "ranking_inference_df = get_ranking_model_input_data(cf_inference_df, df_one_hot_cat_features)\n", - "\n", + "ranking_inference_df = get_ranking_model_input_data(cf_inference_df, df_one_hot_cat_features)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ "# Get our ranked product recommendations and attach the predictions to the model input\n", - "ranking_inference_df['propensity_to_buy'] = ranking_model_predictor.predict(ranking_inference_df.to_numpy()).decode('utf-8').split(',')" + "# ranking_inference_df['propensity_to_buy'] = ranking_model_predictor.predict(ranking_inference_df.to_numpy()).decode('utf-8').split('\\n')\n", + "predictions = ranking_model_predictor.predict(ranking_inference_df.to_numpy()).decode('utf-8').split('\\n')\n", + "predictions = [float(p) for p in predictions if p != ''] # Convert to float and remove empty strings\n", + "\n", + "ranking_inference_df['propensity_to_buy'] = predictions\n" ] }, { @@ -401,16 +823,126 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_idproduct_namepropensity_to_buy
0P6176pretzel \\\"shells\\\"0.999014
1P11086almonds mini nut-thins cheddar cheese0.999014
2P16823pirouette chocolate fudge creme filled wafers0.999014
3P6247fruit punch roarin' waters0.999014
4P4152white cheddar bunnies0.999014
5P10682mini cakes birthday cake0.999014
6P5429organic sweet potato puree0.998438
7P14539chocolate, organic, unsweetened, 100% cacao0.998438
8P15430organic pork chop seasoning0.439611
9P7822degreaser0.439611
\n", + "
" + ], + "text/plain": [ + " product_id product_name propensity_to_buy\n", + "0 P6176 pretzel \\\"shells\\\" 0.999014\n", + "1 P11086 almonds mini nut-thins cheddar cheese 0.999014\n", + "2 P16823 pirouette chocolate fudge creme filled wafers 0.999014\n", + "3 P6247 fruit punch roarin' waters 0.999014\n", + "4 P4152 white cheddar bunnies 0.999014\n", + "5 P10682 mini cakes birthday cake 0.999014\n", + "6 P5429 organic sweet potato puree 0.998438\n", + "7 P14539 chocolate, organic, unsweetened, 100% cacao 0.998438\n", + "8 P15430 organic pork chop seasoning 0.439611\n", + "9 P7822 degreaser 0.439611" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Join all the data back together for inspection\n", "personalized_recommendations = pd.concat([cf_inference_df[['customer_id', 'product_id', 'product_name']],\n", " ranking_inference_df[['propensity_to_buy']]], axis=1)\n", "\n", "# And sort by propensity to buy\n", - "personalized_recommendations.sort_values(by='propensity_to_buy', ascending=False)[['product_id','product_name']].reset_index(drop=True).head(5)" + "personalized_recommendations.sort_values(by='propensity_to_buy', ascending=False)[['product_id','product_name', 'propensity_to_buy']].reset_index(drop=True).head(10)" ] }, { @@ -459,14 +991,619 @@ } ], "metadata": { - "instance_type": "ml.t3.medium", + "availableInstances": [ + { + "_defaultOrder": 0, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.t3.medium", + "vcpuNum": 2 + }, + { + "_defaultOrder": 1, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.t3.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 2, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.t3.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 3, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.t3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 4, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 5, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 6, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 7, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 8, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 9, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 10, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 11, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 12, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5d.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 13, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5d.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 14, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5d.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 15, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5d.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 16, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5d.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 17, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5d.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 18, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5d.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 19, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 20, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": true, + "memoryGiB": 0, + "name": "ml.geospatial.interactive", + "supportedImageNames": [ + "sagemaker-geospatial-v1-0" + ], + "vcpuNum": 0 + }, + { + "_defaultOrder": 21, + "_isFastLaunch": true, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.c5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 22, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.c5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 23, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.c5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 24, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.c5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 25, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 72, + "name": "ml.c5.9xlarge", + "vcpuNum": 36 + }, + { + "_defaultOrder": 26, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 96, + "name": "ml.c5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 27, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 144, + "name": "ml.c5.18xlarge", + "vcpuNum": 72 + }, + { + "_defaultOrder": 28, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.c5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 29, + "_isFastLaunch": true, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g4dn.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 30, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g4dn.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 31, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g4dn.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 32, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g4dn.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 33, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g4dn.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 34, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g4dn.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 35, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 61, + "name": "ml.p3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 36, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 244, + "name": "ml.p3.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 37, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 488, + "name": "ml.p3.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 38, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.p3dn.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 39, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.r5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 40, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.r5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 41, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.r5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 42, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.r5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 43, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.r5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 44, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.r5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 45, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.r5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 46, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.r5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 47, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 48, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 49, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 50, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 51, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 52, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 53, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.g5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 54, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.g5.48xlarge", + "vcpuNum": 192 + }, + { + "_defaultOrder": 55, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 56, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4de.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 57, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.trn1.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 58, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1.32xlarge", + "vcpuNum": 128 + }, + { + "_defaultOrder": 59, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1n.32xlarge", + "vcpuNum": 128 + } + ], + "instance_type": "ml.m5.large", "interpreter": { "hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322" }, "kernelspec": { - "display_name": "Python 3 (Data Science)", + "display_name": "Python 3 (Data Science 3.0)", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1" }, "language_info": { "codemirror_mode": { @@ -478,7 +1615,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/5_cleanup.ipynb b/5_cleanup.ipynb index 4ad990b..f232bbd 100644 --- a/5_cleanup.ipynb +++ b/5_cleanup.ipynb @@ -27,8 +27,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 29, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from parameter_store import ParameterStore\n", @@ -44,8 +46,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 30, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "role = sagemaker.get_execution_role()\n", @@ -68,8 +72,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 31, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "parameters = ps.read()\n", @@ -96,9 +102,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 32, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deleting feature group: recsys-customers-fg-05-28-23-12\n", + "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-customers-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n", + "Waiting for Feature Group Deletion\n", + "Deleting feature group: recsys-products-fg-05-28-23-12\n", + "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-products-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n", + "Waiting for Feature Group Deletion\n", + "Deleting feature group: recsys-orders-fg-05-28-23-12\n", + "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-orders-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n", + "Waiting for Feature Group Deletion\n", + "Deleting feature group: recsys-click-stream-historical-fg-05-28-23-12\n", + "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-click-stream-historical-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n", + "Waiting for Feature Group Deletion\n", + "Deleting feature group: recsys-click-stream-fg-05-28-23-12\n", + "Deleting all s3 objects in prefix: recsys-feature-store/XXXXXXXXXXXX/sagemaker/ap-southeast-2/offline-store/recsys-click-stream-fg-05-28-23-12 in bucket sagemaker-ap-southeast-2-XXXXXXXXXXXX\n", + "Waiting for Feature Group Deletion\n" + ] + } + ], "source": [ "feature_group_list = [customers_feature_group_name, products_feature_group_name,\n", " orders_feature_group_name, click_stream_historical_feature_group_name,\n", @@ -118,8 +148,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 33, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def clean_up_endpoint(endpoint_name):\n", @@ -133,9 +165,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 34, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deleting endpoint: recsys-cf-model-05-28-23-35\n", + "Deleting endpoint configuration : recsys-cf-model-05-28-23-35\n", + "Deleting endpoint: recsys-rerank-model-05-28-23-35\n", + "Deleting endpoint configuration : recsys-rerank-model-05-28-23-35\n" + ] + } + ], "source": [ "endpoint_list = [cf_model_endpoint_name, ranking_model_endpoint_name]\n", "\n", @@ -152,9 +197,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 35, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ResponseMetadata': {'RequestId': 'f78be708-b4a7-d2de-a95b-934960b27d21',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': 'f78be708-b4a7-d2de-a95b-934960b27d21',\n", + " 'x-amz-id-2': 'XP2LB+4JbsvKcU5fzgjjolKNwu/VscIteAKJME7nnFnYkN/XL9mdTJo8RFXmZu0APzfUXz7F0myVLYKFb1SWXj8qrvucOao8',\n", + " 'date': 'Wed, 29 May 2024 00:08:33 GMT',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '0',\n", + " 'connection': 'keep-alive'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "kinesis_client.delete_stream(StreamName=kinesis_stream_name,\n", " EnforceConsumerDeletion=True)" @@ -169,9 +235,28 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 36, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ResponseMetadata': {'RequestId': '334ef27e-7207-485b-bb2b-8b97a99da0f8',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '334ef27e-7207-485b-bb2b-8b97a99da0f8',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '2',\n", + " 'date': 'Wed, 29 May 2024 00:08:33 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "response = kinesis_analytics_client.describe_application(ApplicationName=kinesis_analytics_application_name)\n", "create_ts = response['ApplicationDetail']['CreateTimestamp']\n", @@ -187,14 +272,619 @@ } ], "metadata": { + "availableInstances": [ + { + "_defaultOrder": 0, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.t3.medium", + "vcpuNum": 2 + }, + { + "_defaultOrder": 1, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.t3.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 2, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.t3.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 3, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.t3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 4, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 5, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 6, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 7, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 8, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 9, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 10, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 11, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 12, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5d.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 13, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5d.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 14, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5d.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 15, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5d.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 16, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5d.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 17, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5d.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 18, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5d.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 19, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 20, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": true, + "memoryGiB": 0, + "name": "ml.geospatial.interactive", + "supportedImageNames": [ + "sagemaker-geospatial-v1-0" + ], + "vcpuNum": 0 + }, + { + "_defaultOrder": 21, + "_isFastLaunch": true, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.c5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 22, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.c5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 23, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.c5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 24, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.c5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 25, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 72, + "name": "ml.c5.9xlarge", + "vcpuNum": 36 + }, + { + "_defaultOrder": 26, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 96, + "name": "ml.c5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 27, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 144, + "name": "ml.c5.18xlarge", + "vcpuNum": 72 + }, + { + "_defaultOrder": 28, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.c5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 29, + "_isFastLaunch": true, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g4dn.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 30, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g4dn.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 31, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g4dn.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 32, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g4dn.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 33, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g4dn.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 34, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g4dn.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 35, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 61, + "name": "ml.p3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 36, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 244, + "name": "ml.p3.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 37, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 488, + "name": "ml.p3.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 38, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.p3dn.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 39, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.r5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 40, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.r5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 41, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.r5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 42, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.r5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 43, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.r5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 44, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.r5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 45, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.r5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 46, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.r5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 47, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 48, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 49, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 50, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 51, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 52, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 53, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.g5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 54, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.g5.48xlarge", + "vcpuNum": 192 + }, + { + "_defaultOrder": 55, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 56, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4de.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 57, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.trn1.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 58, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1.32xlarge", + "vcpuNum": 128 + }, + { + "_defaultOrder": 59, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1n.32xlarge", + "vcpuNum": 128 + } + ], "instance_type": "ml.t3.medium", "interpreter": { "hash": "fea7262dfaa662dc7ea8f1b256cf975fd886d2f868152164ef15877318a1e322" }, "kernelspec": { - "display_name": "Python 3 (Data Science)", + "display_name": "Python 3 (Data Science 3.0)", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ca-central-1:310906938811:image/datascience-1.0" + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1" }, "language_info": { "codemirror_mode": { @@ -206,7 +896,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.10.6" } }, "nbformat": 4,