diff --git a/build.sbt b/build.sbt index d85f99016..2919ddae6 100644 --- a/build.sbt +++ b/build.sbt @@ -2,7 +2,7 @@ import sbt.Keys.publishLocalConfiguration ThisBuild / resolvers += Resolver.mavenLocal ThisBuild / scalaVersion := "2.12.15" -ThisBuild / version := "0.8.2" +ThisBuild / version := "0.8.0" ThisBuild / organization := "com.linkedin.feathr" ThisBuild / organizationName := "linkedin" val sparkVersion = "3.1.3" diff --git a/docs/concepts/feature-registry.md b/docs/concepts/feature-registry.md index 9bc00b275..112fa1c4b 100644 --- a/docs/concepts/feature-registry.md +++ b/docs/concepts/feature-registry.md @@ -74,6 +74,8 @@ client.register_features() all_features = client.list_registered_features(project_name=client.project_name) ``` +Please avoid applying a same name to different features under a certain project. Since it will be treated as updating an exsiting project which is not supported by feathr and will cause errors. + ### Reuse Features from Existing Registry The feature producers can just let the feature consumers know which features exist so the feature consumers can reuse them. For feature consumers, they can reuse existing features from the registry. The whole project can be retrieved to local environment by calling this API `client.get_features_from_registry` with a project name. This encourage feature reuse across organizations. For example, end users of a feature just need to read all feature definitions from the existing projects, then use a few features from the projects and join those features with a new dataset you have. diff --git a/docs/dev_guide/feathr_overall_release_guide.md b/docs/dev_guide/feathr_overall_release_guide.md index 069f6edf4..0174c8dae 100644 --- a/docs/dev_guide/feathr_overall_release_guide.md +++ b/docs/dev_guide/feathr_overall_release_guide.md @@ -31,11 +31,11 @@ Read through the [commit log](https://github.com/feathr-ai/feathr/commits/main) ## Code Changes Before the release is made, the version needs to be updated in following places - [build.sbt](https://github.com/feathr-ai/feathr/blob/main/build.sbt#L3) - For Maven release version -- [setup.py](https://github.com/feathr-ai/feathr/blob/main/feathr_project/setup.py#L10) - For PyPi release version +- [version.py](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathr/version.py#L1) - For Feathr version - [conf.py](https://github.com/feathr-ai/feathr/blob/main/feathr_project/docs/conf.py#L27) - For documentation version -- [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/test/test_user_workspace/feathr_config.yaml#L84) - To set the spark runtime location for Azure Synapse and Azure Databricks used by test suite. -- [constants.py](https://github.com/feathr-ai/feathr/blob/73656fe4a57219e99ff6fede10d51a000ae90fa1/feathr_project/feathr/constants.py#L31) - To set the default maven artifact version +- [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/test/test_user_workspace/feathr_config.yaml#L84) - To set the spark runtime location for Azure Synapse and Azure Databricks used by test suite. Please update all .yaml files under this path. - [azure_resource_provision.json](https://github.com/feathr-ai/feathr/blob/main/docs/how-to-guides/azure_resource_provision.json#L114) - To set the deployment template to pull the latest release image. +- [constants.py](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathr/constants.py#L31) - To set the default maven artifact version (Only needed when maven version is **NOT** the same as python sdk version) ## Triggering automated release pipelines Our goal is to automate the release process as much as possible. So far, we have automated the following steps diff --git a/docs/how-to-guides/azure_resource_provision.json b/docs/how-to-guides/azure_resource_provision.json index 6ab77a419..03d175052 100644 --- a/docs/how-to-guides/azure_resource_provision.json +++ b/docs/how-to-guides/azure_resource_provision.json @@ -111,7 +111,7 @@ "destinationBacpacBlobUrl": "[concat('https://',variables('dlsName'),'.blob.core.windows.net/',variables('dlsFsName'),'/',variables('bacpacBlobName'))]", "bacpacDeploymentScriptName": "CopyBacpacFile", "bacpacDbExtensionName": "registryRbacDbImport", - "preBuiltdockerImage": "feathrfeaturestore/feathr-registry:releases-v0.8.2" + "preBuiltdockerImage": "feathrfeaturestore/feathr-registry:releases-v0.8.0" }, "functions": [], "resources": [ diff --git a/docs/how-to-guides/feathr-configuration-and-env.md b/docs/how-to-guides/feathr-configuration-and-env.md index 7fc5e8c3b..b8745b332 100644 --- a/docs/how-to-guides/feathr-configuration-and-env.md +++ b/docs/how-to-guides/feathr-configuration-and-env.md @@ -116,7 +116,7 @@ feathr_client = FeathrClient(..., secret_manager_client = cache) | SPARK_CONFIG__SPARK_RESULT_OUTPUT_PARTS | Configure number of parts for the spark output for feature generation job | Required | | SPARK_CONFIG__AZURE_SYNAPSE__DEV_URL | Dev URL to the synapse cluster. Usually it's something like `https://yourclustername.dev.azuresynapse.net` | Required if using Azure Synapse | | SPARK_CONFIG__AZURE_SYNAPSE__POOL_NAME | name of the spark pool that you are going to use | Required if using Azure Synapse | -| SPARK_CONFIG__AZURE_SYNAPSE__WORKSPACE_DIR | A location that Synapse has access to. This workspace dir stores all the required configuration files and the jar resources. All the feature definitions will be uploaded here | Required if using Azure Synapse | +| SPARK_CONFIG__AZURE_SYNAPSE__WORKSPACE_DIR | A location that Synapse has access to. This workspace dir stores all the required configuration files and the jar resources. All the feature definitions will be uploaded here. Suggest to use an empty dir for a new spark job to avoid conflicts. | Required if using Azure Synapse | | SPARK_CONFIG__AZURE_SYNAPSE__EXECUTOR_SIZE | Specifies the executor size for the Azure Synapse cluster. Currently the options are `Small`, `Medium`, `Large`. | Required if using Azure Synapse | | SPARK_CONFIG__AZURE_SYNAPSE__EXECUTOR_NUM | Specifies the number of executors for the Azure Synapse cluster | Required if using Azure Synapse | | SPARK_CONFIG__AZURE_SYNAPSE__FEATHR_RUNTIME_LOCATION | Specifies the Feathr runtime location. Support local paths, path start with `http(s)://`, and paths start with `abfss:/`. If not set, will use the [Feathr package published in Maven](https://search.maven.org/artifact/com.linkedin.feathr/feathr_2.12). | Required if using Azure Synapse | diff --git a/docs/samples/product_recommendation_demo.ipynb b/docs/samples/azure_synapse/product_recommendation_demo.ipynb similarity index 69% rename from docs/samples/product_recommendation_demo.ipynb rename to docs/samples/azure_synapse/product_recommendation_demo.ipynb index 44febb062..e93860269 100644 --- a/docs/samples/product_recommendation_demo.ipynb +++ b/docs/samples/azure_synapse/product_recommendation_demo.ipynb @@ -4,26 +4,32 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Demo Notebook: Feathr Feature Store on Azure\n", + "# Product Recommendation with Feathr on Azure\n", "\n", "This notebook demonstrates how Feathr Feature Store can simplify and empower your model training and inference. You will learn:\n", "\n", "1. Define sharable features using Feathr API\n", - "2. Create a training dataset via point-in-time feature join with Feathr API\n", - "3. Materialize features to online store and then retrieve them with Feathr API" + "2. Register features with register API.\n", + "3. Create a training dataset via point-in-time feature join with Feathr API\n", + "4. Materialize features to online store and then retrieve them with Feathr API\n", + "\n", + "In this tutorial, we use Feathr to create a model that predicts users' product rating. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Prerequisite: Use Quick Start Template to Provision Azure Resources\n", + "## 1. Prerequisite: Use Azure Resource Manager(ARM) to Provision Azure Resources\n", "\n", "First step is to provision required cloud resources if you want to use Feathr. Feathr provides a python based client to interact with cloud resources.\n", "\n", - "Please follow the steps [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/feathr-ai/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script. \n", + "Please follow the steps [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. This will create a new resource group and deploy the needed Azure resources in it. \n", "\n", + "If you already have an existing resource group and only want to install few resources manually you can refer to the cli documentation [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html). It provides CLI commands to install the needed resources. \n", + "**Please Note: CLI documentation is for advance users since there are lot of configurations and role assignment that would have to be done manually so it won't work out of box and should just be used for reference. ARM template is the preferred way to deploy.**\n", "\n", + "The below architecture diagram represents how different resources interact with each other\n", "![Architecture](https://github.com/feathr-ai/feathr/blob/main/docs/images/architecture.png?raw=true)" ] }, @@ -31,11 +37,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Prerequisite: Install Feathr and Import Dependencies\n", - "\n", - "Install Feathr using pip:\n", + "## 2. Prerequisite: Login to Azure and Install Feathr\n", "\n", - "`pip install -U feathr pandavro scikit-learn`" + "Login to Azure with a device code (You will see instructions in the output once you execute the cell):" ] }, { @@ -44,53 +48,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Import Dependencies\n", - "import glob\n", - "import os\n", - "import tempfile\n", - "from datetime import datetime, timedelta\n", - "from math import sqrt\n", - "\n", - "import pandas as pd\n", - "import pandavro as pdx\n", - "from feathr import FeathrClient\n", - "from feathr import BOOLEAN, FLOAT, INT32, ValueType\n", - "from feathr import Feature, DerivedFeature, FeatureAnchor\n", - "from feathr import BackfillTime, MaterializationSettings\n", - "from feathr import FeatureQuery, ObservationSettings\n", - "from feathr import RedisSink\n", - "from feathr import INPUT_CONTEXT, HdfsSource\n", - "from feathr import WindowAggTransformation\n", - "from feathr import TypedKey\n", - "from sklearn.metrics import mean_squared_error\n", - "from sklearn.model_selection import train_test_split\n", - "from azure.identity import DefaultAzureCredential\n", - "from azure.keyvault.secrets import SecretClient" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prerequisite: Configure the required environment with Feathr Quick Start Template\n", - "\n", - "In the first step (Provision cloud resources), you should have provisioned all the required cloud resources. Run the code below to install Feathr, login to Azure to get the required credentials to access more cloud resources." + "! az login --use-device-code" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**REQUIRED STEP: Fill in the resource prefix when provisioning the resources**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "resource_prefix = \"feathr_resource_prefix\"" + "Install Feathr and dependencies to run this notebook." ] }, { @@ -99,14 +64,14 @@ "metadata": {}, "outputs": [], "source": [ - "! pip install feathr azure-cli pandavro scikit-learn" + "%pip install -U feathr pandavro scikit-learn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Login to Azure with a device code (You will see instructions in the output):" + "Import Dependencies to make sure everything is installed correctly" ] }, { @@ -115,7 +80,27 @@ "metadata": {}, "outputs": [], "source": [ - "! az login --use-device-code" + "import glob\n", + "import os\n", + "import tempfile\n", + "from datetime import datetime, timedelta\n", + "from math import sqrt\n", + "\n", + "import pandas as pd\n", + "import pandavro as pdx\n", + "from feathr import FeathrClient\n", + "from feathr import BOOLEAN, FLOAT, INT32, ValueType\n", + "from feathr import Feature, DerivedFeature, FeatureAnchor\n", + "from feathr import BackfillTime, MaterializationSettings\n", + "from feathr import FeatureQuery, ObservationSettings\n", + "from feathr import RedisSink\n", + "from feathr import INPUT_CONTEXT, HdfsSource\n", + "from feathr import WindowAggTransformation\n", + "from feathr import TypedKey\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "from azure.identity import AzureCliCredential\n", + "from azure.keyvault.secrets import SecretClient" ] }, { @@ -123,20 +108,19 @@ "metadata": {}, "source": [ "\n", - "**Permission**\n", + "## 3. Prerequisite: Set the required permissions\n", "\n", - "To proceed with the following steps, you may need additional permission: permission to access the keyvault, permission to access the Storage Blob as a Contributor and permission to submit jobs to Synapse cluster. Skip this step if you have already given yourself the access. Otherwise, run the following lines of command in the Cloud Shell before running the cell below.\n", + "Before you proceed further, you would need additional permissions: permission to access the keyvault, permission to access the Storage Blob as a Contributor and permission to submit jobs to Synapse cluster. Run the following lines of command in the [Cloud Shell](https://shell.azure.com) before running the cells below. Please replace the resource_prefix with the prefix you used in ARM template deployment.\n", "\n", "```\n", - "userId=\n", - "resource_prefix=\n", - "synapse_workspace_name=\"${resource_prefix}syws\"\n", - "keyvault_name=\"${resource_prefix}kv\"\n", - "objectId=$(az ad user show --id $userId --query id -o tsv)\n", - "az keyvault update --name $keyvault_name --enable-rbac-authorization false\n", - "az keyvault set-policy -n $keyvault_name --secret-permissions get list --object-id $objectId\n", - "az role assignment create --assignee $userId --role \"Storage Blob Data Contributor\"\n", - "az synapse role assignment create --workspace-name $synapse_workspace_name --role \"Synapse Contributor\" --assignee $userId\n", + " resource_prefix=\"YOUR_RESOURCE_PREFIX\"\n", + " synapse_workspace_name=\"${resource_prefix}syws\"\n", + " keyvault_name=\"${resource_prefix}kv\"\n", + " objectId=$(az ad signed-in-user show --query id -o tsv)\n", + " az keyvault update --name $keyvault_name --enable-rbac-authorization false\n", + " az keyvault set-policy -n $keyvault_name --secret-permissions get list --object-id $objectId\n", + " az role assignment create --assignee $userId --role \"Storage Blob Data Contributor\"\n", + " az synapse role assignment create --workspace-name $synapse_workspace_name --role \"Synapse Contributor\" --assignee $userId\n", "```\n" ] }, @@ -144,7 +128,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Get all the required credentials from Azure KeyVault" + "# 4. Prerequisite: Feathr Configuration\n", + "\n", + "### Setting the environment variables\n", + "Set the environment variables that will be used by Feathr as configuration. Feathr supports configuration via enviroment variables and yaml, you can read more about it [here](https://feathr-ai.github.io/feathr/how-to-guides/feathr-configuration-and-env.html).\n", + "\n", + "**Fill in the `resource_prefix` that you used while provisioning the resources in Step 1 using ARM.**" ] }, { @@ -153,44 +142,49 @@ "metadata": {}, "outputs": [], "source": [ - "# Get all the required credentials from Azure Key Vault\n", - "key_vault_name=resource_prefix+\"kv\"\n", - "synapse_workspace_url=resource_prefix+\"syws\"\n", - "adls_account=resource_prefix+\"dls\"\n", - "adls_fs_name=resource_prefix+\"fs\"\n", - "purview_name=resource_prefix+\"purview\"\n", - "key_vault_uri = f\"https://{key_vault_name}.vault.azure.net\"\n", - "credential = DefaultAzureCredential(exclude_interactive_browser_credential=False)\n", - "client = SecretClient(vault_url=key_vault_uri, credential=credential)\n", - "secretName = \"FEATHR-ONLINE-STORE-CONN\"\n", - "retrieved_secret = client.get_secret(secretName).value\n", - "\n", - "# Get redis credentials; This is to parse Redis connection string.\n", - "redis_port=retrieved_secret.split(',')[0].split(\":\")[1]\n", - "redis_host=retrieved_secret.split(',')[0].split(\":\")[0]\n", - "redis_password=retrieved_secret.split(',')[1].split(\"password=\",1)[1]\n", - "redis_ssl=retrieved_secret.split(',')[2].split(\"ssl=\",1)[1]\n", - "\n", - "# Set the resource link\n", - "os.environ['spark_config__azure_synapse__dev_url'] = f'https://{synapse_workspace_url}.dev.azuresynapse.net'\n", - "os.environ['spark_config__azure_synapse__pool_name'] = 'spark31'\n", - "os.environ['spark_config__azure_synapse__workspace_dir'] = f'abfss://{adls_fs_name}@{adls_account}.dfs.core.windows.net/feathr_project'\n", - "os.environ['online_store__redis__host'] = redis_host\n", - "os.environ['online_store__redis__port'] = redis_port\n", - "os.environ['online_store__redis__ssl_enabled'] = redis_ssl\n", - "os.environ['REDIS_PASSWORD']=redis_password\n", - "feathr_output_path = f'abfss://{adls_fs_name}@{adls_account}.dfs.core.windows.net/feathr_output'" + "RESOURCE_PREFIX = \"YOUR_RESOURCE_PREFIX\" # from ARM deployment in Step 1\n", + "FEATHR_PROJECT_NAME=\"YOUR_PROJECT_NAME\" # provide a unique name" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Prerequisite: Configure the required environment (Skip this step if using the above Quick Start Template)\n", "\n", - "In the first step (Provision cloud resources), you should have provisioned all the required cloud resources. If you use Feathr CLI to create a workspace, you should have a folder with a file called `feathr_config.yaml` in it with all the required configurations. Otherwise, update the configuration below.\n", + "# Get name for deployed resources using the resource prefix\n", + "KEY_VAULT_NAME=f\"{RESOURCE_PREFIX}kv\"\n", + "SYNAPSE_WORKSPACE_NAME=f\"{RESOURCE_PREFIX}syws\"\n", + "ADLS_ACCOUNT=f\"{RESOURCE_PREFIX}dls\"\n", + "ADLS_FS_NAME=f\"{RESOURCE_PREFIX}fs\"\n", + "KEY_VAULT_URI = f\"https://{KEY_VAULT_NAME}.vault.azure.net\"\n", + "FEATHR_API_APP = f\"{RESOURCE_PREFIX}webapp\"\n", + "\n", "\n", - "The code below will write this configuration string to a temporary location and load it to Feathr. Please still refer to [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml) and use that as the source of truth. It should also have more explanations on the meaning of each variable." + "# Getting the credential object for Key Vault client\n", + "credential = AzureCliCredential()\n", + "client = SecretClient(vault_url=KEY_VAULT_URI, credential=credential)\n", + "\n", + "# Getting Redis store's connection string.\n", + "retrieved_secret = client.get_secret(\"FEATHR-ONLINE-STORE-CONN\").value\n", + "\n", + "# Parse Redis connection string\n", + "REDIS_PORT=retrieved_secret.split(',')[0].split(\":\")[1]\n", + "REDIS_HOST=retrieved_secret.split(',')[0].split(\":\")[0]\n", + "REDIS_PASSWORD=retrieved_secret.split(',')[1].split(\"password=\",1)[1]\n", + "REDIS_SSL=retrieved_secret.split(',')[2].split(\"ssl=\",1)[1]\n", + "# Set password as environment variable.\n", + "os.environ['REDIS_PASSWORD']=REDIS_PASSWORD" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Write the configuration as yaml file.\n", + "\n", + "The code below will write this configuration string to a temporary location and load it to Feathr. Please refer to [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml) for full list of configuration options and details about them." ] }, { @@ -200,68 +194,38 @@ "outputs": [], "source": [ "import tempfile\n", - "yaml_config = \"\"\"\n", - "# Please refer to https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml for explanations on the meaning of each field.\n", + "yaml_config = f\"\"\"\n", "api_version: 1\n", "project_config:\n", - " project_name: 'feathr_getting_started'\n", - " required_environment_variables:\n", - " - 'REDIS_PASSWORD'\n", + " project_name: '{FEATHR_PROJECT_NAME}'\n", "offline_store:\n", "# Please set 'enabled' flags as true (false by default) if any of items under the same paths are expected to be visited\n", " adls:\n", " adls_enabled: true\n", " wasb:\n", " wasb_enabled: true\n", - " s3:\n", - " s3_enabled: false\n", - " s3_endpoint: 's3.amazonaws.com'\n", - " jdbc:\n", - " jdbc_enabled: false\n", - " jdbc_database: 'feathrtestdb'\n", - " jdbc_table: 'feathrtesttable'\n", - " snowflake:\n", - " snowflake_enabled: false\n", - " url: \".snowflakecomputing.com\"\n", - " user: \"\"\n", - " role: \"\"\n", "spark_config:\n", " spark_cluster: 'azure_synapse'\n", " spark_result_output_parts: '1'\n", " azure_synapse:\n", - " dev_url: 'https://feathrazuretest3synapse.dev.azuresynapse.net'\n", - " pool_name: 'spark3'\n", - " workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_getting_started'\n", + " dev_url: 'https://{SYNAPSE_WORKSPACE_NAME}.dev.azuresynapse.net'\n", + " pool_name: 'spark31'\n", + " workspace_dir: 'abfss://{ADLS_FS_NAME}@{ADLS_ACCOUNT}.dfs.core.windows.net/feathr_project'\n", " executor_size: 'Small'\n", " executor_num: 1\n", - " databricks:\n", - " workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net'\n", - " config_template: {'run_name':'','new_cluster':{'spark_version':'9.1.x-scala2.12','node_type_id':'Standard_D3_v2','num_workers':2,'spark_conf':{}},'libraries':[{'jar':''}],'spark_jar_task':{'main_class_name':'','parameters':['']}}\n", - " work_dir: 'dbfs:/feathr_getting_started'\n", "online_store:\n", " redis:\n", - " host: 'feathrazuretest3redis.redis.cache.windows.net'\n", - " port: 6380\n", - " ssl_enabled: True\n", + " host: '{REDIS_HOST}'\n", + " port: {REDIS_PORT}\n", + " ssl_enabled: {REDIS_SSL}\n", "feature_registry:\n", - " api_endpoint: \"https://feathr-sql-registry.azurewebsites.net/api/v1\"\n", + " api_endpoint: 'https://{FEATHR_API_APP}.azurewebsites.net/api/v1'\n", "\"\"\"\n", + "\n", "tmp = tempfile.NamedTemporaryFile(mode='w', delete=False)\n", "with open(tmp.name, \"w\") as text_file:\n", " text_file.write(yaml_config)\n", - "feathr_output_path = f'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_output'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prerequisite: Setup necessary environment variables (Skip this step if using the above Quick Start Template)\n", - "\n", - "You should setup the environment variables in order to run this sample. More environment variables can be set by referring to [feathr_config.yaml](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml) and use that as the source of truth. It also has more explanations on the meaning of each variable.\n", - "\n", - "To run this notebook, for Azure users, you need REDIS_PASSWORD.\n", - "To run this notebook, for Databricks useres, you need DATABRICKS_WORKSPACE_TOKEN_VALUE and REDIS_PASSWORD." + "feathr_output_path = f'abfss://{ADLS_FS_NAME}@{ADLS_ACCOUNT}.dfs.core.windows.net/feathr_output'" ] }, { @@ -270,10 +234,10 @@ "source": [ "# Define sharable features using Feathr API\n", "\n", - "In this tutorial, we use Feathr Feature Store to help create a model that predicts users product rating. To make it simple, let's just predict users' rating for ONE product for an e-commerce website. (We have an [advanced demo](./product_recommendation_demo_advanced.ipynb) that predicts ratings for arbitrary products.)\n", + "In this tutorial, we use Feathr Feature Store and create a model that predicts users' product rating. To make it simple, let's just predict users' rating for ONE product for an e-commerce website. (We have an [advanced demo](../product_recommendation_demo_advanced.ipynb) that predicts ratings for arbitrary products.)\n", "\n", "\n", - "## Initialize Feathr Client\n", + "### Initialize Feathr Client\n", "\n", "Let's initialize a Feathr client first. The Feathr client provides all the APIs we need to interact with Feathr Feature Store." ] @@ -291,7 +255,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Understand the Raw Datasets\n", + "### Understand the Raw Datasets\n", "We have 3 raw datasets to work with: one observation dataset(a.k.a. label dataset) and two raw datasets to generate features." ] }, @@ -305,6 +269,7 @@ "# Observation dataset usually comes with a event_timestamp to denote when the observation happened.\n", "# The label here is product_rating. Our model objective is to predict a user's rating for this product.\n", "import pandas as pd\n", + "# Public URL hosting mock data\n", "pd.read_csv(\"https://azurefeathrstorage.blob.core.windows.net/public/sample_data/product_recommendation_sample/user_observation_mock_data.csv\")" ] }, @@ -346,7 +311,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## What's a Feature in Feathr\n", + "### What's a Feature in Feathr\n", "A feature is an individual measurable property or characteristic of a phenomenon which is sometimes time-sensitive. \n", "\n", "In Feathr, feature can be defined by the following characteristics:\n", @@ -360,7 +325,7 @@ "1. Feature source: what source data that this feature is based on\n", "2. Transformation: what transformation is used to transform the source data into feature. Transformation can be optional when you just want to take a column out from the source data.\n", "\n", - "(For more details on feature definition, please refer to the [Feathr Feature Definition Guide](https://github.com/feathr-ai/feathr/blob/main/docs/concepts/feature-definition.md))" + "(For more details on feature definition, please refer to the [Feathr Feature Definition Guide](https://feathr-ai.github.io/feathr/concepts/feature-definition.html))" ] }, { @@ -456,9 +421,7 @@ "source": [ "### Window aggregation features\n", "\n", - "Using [window aggregations](https://en.wikipedia.org/wiki/Window_function_%28SQL%29) can help us create more powerful features. A window aggregation feature compress large amount of information into one single feature value. Using our raw data as an example, we have the users' purchase history data that might be quite some rows, we want to create a window aggregation feature that represents their last 90 days of average purcahse amount.\n", - "\n", - "Feathr provides a nice API to help us create such window aggregation features.\n", + "Using [window aggregations](https://en.wikipedia.org/wiki/Window_function_%28SQL%29) can help us create more powerful features. A window aggregation feature compresses large amount of information into one single feature value. Using our raw data as an example, we have the user's purchase history data that might be quite some rows, we want to create a window aggregation feature that represents their last 90 days of average purchase amount.\n", "\n", "To create this window aggregation feature via Feathr, we just need to define the following parameters with `WindowAggTransformation` API:\n", "1. `agg_expr`: the field/column you want to aggregate. It can be a ANSI SQL expression. So we just write `cast_float(purchase_amount)`(the raw data might be in string form, let's cast_float).\n", @@ -509,9 +472,7 @@ "### Derived Features Section\n", "Derived features are features that are computed from other Feathr features. They could be computed from anchored features, or other derived features.\n", "\n", - "Typical usage includes feature cross(f1 * f2), or computing cosine similarity between two features.\n", - "\n", - "The syntax works in a similar way." + "Typical usage includes feature cross(f1 * f2), or computing cosine similarity between two features. The syntax works in a similar way." ] }, { @@ -532,7 +493,7 @@ "metadata": {}, "source": [ "### Build Features\n", - "Lastly, we need to build those features so that it can be consumed later. Note that we have to build both the \"anchor\" and the \"derived\" features." + "Lastly, we need to build these features so that they can be consumed later. Note that we have to build both the \"anchor\" and the \"derived\" features." ] }, { @@ -550,12 +511,11 @@ "metadata": {}, "source": [ "### Optional: A Special Type of Feature: Request Feature\n", - "For advanced user cases, in some cases, features defined on top of request data(a.k.a. observation data) may have no entity key or timestamp.\n", - "It is merely a function/transformation executing against request data at runtime.\n", - "For example, the day of week of the request, which is calculated by converting the request UNIX timestamp.\n", - "In this case, the `source` section should be `INPUT_CONTEXT` to indicate the source of those defined anchors.\n", + "Sometimes features defined on top of request data(a.k.a. observation data) may have no entity key or timestamp. It is merely a function/transformation executing against request data at runtime.\n", + "\n", + "For example, the day of the week of the request, which is calculated by converting the request UNIX timestamp. In this case, the `source` section should be `INPUT_CONTEXT` to indicate the source of those defined anchors.\n", "\n", - "We won't cover the details it in this notebook." + "We won't cover the details of it in this notebook." ] }, { @@ -564,12 +524,11 @@ "source": [ "## Create training data using point-in-time correct feature join\n", "\n", - "A training dataset usually contains entity id column(s), multiple feature columns, event timestamp column and label/target column. \n", + "A training dataset usually contains `entity id` column(s), multiple `feature` columns, event timestamp column and `label/target` column. \n", "\n", - "To create a training dataset using Feathr, we need to provide a feature join settings to specify\n", - "what features and how these features should be joined to the observation data. \n", + "To create a training dataset using Feathr, we need to provide a feature join settings to specify what features and how these features should be joined to the observation data. \n", "\n", - "(To learn more on this topic, please refer to [Point-in-time Correctness](https://github.com/feathr-ai/feathr/blob/main/docs/concepts/point-in-time-join.md))" + "(To learn more on this topic, please refer to [Point-in-time Correctness](https://feathr-ai.github.io/feathr/concepts/point-in-time-join.html))." ] }, { @@ -578,12 +537,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Synapse and Databricks have different output path format\n", - "if feathr_client.spark_runtime == 'databricks':\n", - " output_path = 'dbfs:/feathrazure_test.avro'\n", - "else:\n", - " output_path = feathr_output_path\n", - "\n", + "output_path = feathr_output_path\n", "# Features that we want to request\n", "feature_query = FeatureQuery(feature_list=[\"feature_user_age\", \n", " \"feature_user_tax_rate\", \n", @@ -606,7 +560,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Download the result and show the result\n", + "### Download the result and show the result\n", "\n", "Let's use the helper function `get_result_df` to download the result and view it:" ] @@ -639,7 +593,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Train a machine learning model\n", + "### Train a machine learning model\n", "After getting all the features, let's train a machine learning model with the converted feature by Feathr:" ] }, @@ -694,16 +648,16 @@ "\n", "In the previous section, we demonstrated how Feathr can compute feature value to generate training dataset from feature definition on-they-fly.\n", "\n", - "Now let's talk about how we can use the trained models. We can use the trained models for offline inference as well as online inference. In both cases, we need features to be feed into the models. For offline inference, you can compute and get the features on-demand; or you can store the computed features to some offline database for later offline inference.\n", + "Now let's talk about how we can use the trained models. We can use the trained models for both online and offline inference. In both cases, we need features to be fed into the models. For offline inference, you can compute and get the features on-demand; or you can store the computed features to some offline database for later offline inference.\n", "\n", "For online inference, we can use Feathr to compute and store the features in the online database. Then use it for online inference when the request comes.\n", "\n", - "![img](../images/online_inference.jpg)\n", + "![img](../../images/online_inference.jpg)\n", "\n", "\n", - "In this section, we will focus on materialize features to online store. For materialization to offline store, you can check out our [user guide](https://github.com/feathr-ai/feathr/blob/main/docs/concepts/materializing-features.md#materializing-features-to-offline-store).\n", + "In this section, we will focus on materialize features to online store. For materialization to offline store, you can check out our [user guide](https://feathr-ai.github.io/feathr/concepts/materializing-features.html#materializing-features-to-offline-store).\n", "\n", - "We can push the computed features to the online store like below:" + "We can push the computed features to the online store(Redis) like below:" ] }, { @@ -729,7 +683,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Fetch feature value from online store\n", + "### Fetch feature value from online store\n", "We can then get the features from the online store (Redis) via the client's `get_online_features` or `multi_get_online_features` API." ] }, @@ -761,7 +715,7 @@ "source": [ "### Registering and Fetching features\n", "\n", - "We can also register the features with an Apache Atlas compatible service, such as Azure Purview, and share the registered features across teams:" + "We can also register the features and share them across teams:" ] }, { @@ -771,13 +725,23 @@ "outputs": [], "source": [ "feathr_client.register_features()\n", - "feathr_client.list_registered_features(project_name=\"feathr_getting_started\")" + "feathr_client.list_registered_features(project_name=f\"{FEATHR_PROJECT_NAME}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "In this notebook you learnt how to set up Feathr and use it to create features, register features and use those features for model training and inferencing.\n", + "\n", + "We hope this example gave you a good sense of Feathr's capabilities and how you could leverage it within your organization's MLOps workflow." ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3.9.12 ('ifelse_bug_env': venv)", + "display_name": "Python 3.8.13 ('feathrtest')", "language": "python", "name": "python3" }, @@ -791,11 +755,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.8.13" }, "vscode": { "interpreter": { - "hash": "6a6c366ec8f33a88299a9f856c1a3e4312616abcb6fcf46b22c3da0a923e63af" + "hash": "96bbbb728c64ae5eda27ed1c89d74908bf0652fd45caa45cd0ade6bdc0df4d48" } } }, diff --git a/docs/samples/product_recommendation_demo_advanced.ipynb b/docs/samples/product_recommendation_demo_advanced.ipynb index e4c5917a5..b03dccac6 100644 --- a/docs/samples/product_recommendation_demo_advanced.ipynb +++ b/docs/samples/product_recommendation_demo_advanced.ipynb @@ -116,7 +116,7 @@ }, "outputs": [], "source": [ - "resource_prefix = \"ckim2\"" + "resource_prefix = \"feathr_resource_prefix\"" ] }, { @@ -1214,7 +1214,7 @@ "widgets": {} }, "kernelspec": { - "display_name": "Python 3.9.5 ('base')", + "display_name": "Python 3.9.13 64-bit ('3.9.13')", "language": "python", "name": "python3" }, @@ -1228,11 +1228,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.9.13" }, "vscode": { "interpreter": { - "hash": "3d597f4c481aa0f25dceb95d2a0067e73c0966dcbd003d741d821a7208527ecf" + "hash": "c5d1b88564ea095927319e95d120a01ba9530a1c584720276480e541fd6461c7" } } }, diff --git a/feathr_project/feathr/__init__.py b/feathr_project/feathr/__init__.py index 9b0cf0a49..74809fd81 100644 --- a/feathr_project/feathr/__init__.py +++ b/feathr_project/feathr/__init__.py @@ -1,5 +1,3 @@ -import pkg_resources - from .client import FeathrClient from .spark_provider.feathr_configurations import SparkExecutionConfiguration from .definition.feature_derivations import * @@ -19,6 +17,7 @@ from .definition.settings import * from .utils.job_utils import * from .utils.feature_printer import * +from .version import __version__ # skipped class as they are internal methods: # RepoDefinitions, HoconConvertible, @@ -75,7 +74,5 @@ 'ObservationSettings', 'FeaturePrinter', 'SparkExecutionConfiguration', + __version__, ] - - -__version__ = pkg_resources.require("feathr")[0].version diff --git a/feathr_project/feathr/client.py b/feathr_project/feathr/client.py index bc1c97919..216b5f97c 100644 --- a/feathr_project/feathr/client.py +++ b/feathr_project/feathr/client.py @@ -6,6 +6,7 @@ from typing import Dict, List, Union from azure.identity import DefaultAzureCredential +from feathr.definition.transformation import WindowAggTransformation from jinja2 import Template from pyhocon import ConfigFactory import redis @@ -108,7 +109,7 @@ def __init__(self, config_path:str = "./feathr_config.yaml", local_workspace_dir self.credential = credential if self.spark_runtime not in {'azure_synapse', 'databricks', 'local'}: raise RuntimeError( - 'Only \'azure_synapse\' and \'databricks\' are currently supported.') + f'{self.spark_runtime} is not supported. Only \'azure_synapse\', \'databricks\' and \'local\' are currently supported.') elif self.spark_runtime == 'azure_synapse': # Feathr is a spark-based application so the feathr jar compiled from source code will be used in the # Spark job submission. The feathr jar hosted in cloud saves the time users needed to upload the jar from @@ -593,17 +594,31 @@ def _valid_materialize_keys(self, features: List[str], allow_empty_key=False): self.logger.error(f"Inconsistent feature keys. Current keys are {str(keys)}") return False return True - - def materialize_features(self, settings: MaterializationSettings, execution_configurations: Union[SparkExecutionConfiguration ,Dict[str,str]] = {}, verbose: bool = False): + + def materialize_features(self, settings: MaterializationSettings, execution_configurations: Union[SparkExecutionConfiguration ,Dict[str,str]] = {}, verbose: bool = False, allow_materialize_non_agg_feature: bool = False): """Materialize feature data Args: settings: Feature materialization settings execution_configurations: a dict that will be passed to spark job when the job starts up, i.e. the "spark configurations". Note that not all of the configuration will be honored since some of the configurations are managed by the Spark platform, such as Databricks or Azure Synapse. Refer to the [spark documentation](https://spark.apache.org/docs/latest/configuration.html) for a complete list of spark configurations. + allow_materialize_non_agg_feature: Materializing non-aggregated features (the features without WindowAggTransformation) doesn't output meaningful results so it's by default set to False, but if you really want to materialize non-aggregated features, set this to True. """ feature_list = settings.feature_names if len(feature_list) > 0 and not self._valid_materialize_keys(feature_list): raise RuntimeError(f"Invalid materialization features: {feature_list}, since they have different keys. Currently Feathr only supports materializing features of the same keys.") + + if not allow_materialize_non_agg_feature: + # Check if there are non-aggregation features in the list + for fn in feature_list: + # Check over anchor features + for anchor in self.anchor_list: + for feature in anchor.features: + if feature.name == fn and not isinstance(feature.transform, WindowAggTransformation): + raise RuntimeError(f"Feature {fn} is not an aggregation feature. Currently Feathr only supports materializing aggregation features. If you want to materialize {fn}, please set allow_materialize_non_agg_feature to True.") + # Check over derived features + for feature in self.derived_feature_list: + if feature.name == fn and not isinstance(feature.transform, WindowAggTransformation): + raise RuntimeError(f"Feature {fn} is not an aggregation feature. Currently Feathr only supports materializing aggregation features. If you want to materialize {fn}, please set allow_materialize_non_agg_feature to True.") # Collect secrets from sinks secrets = [] diff --git a/feathr_project/feathr/constants.py b/feathr_project/feathr/constants.py index c4cbad7ff..b2222e2b6 100644 --- a/feathr_project/feathr/constants.py +++ b/feathr_project/feathr/constants.py @@ -28,7 +28,11 @@ TYPEDEF_ARRAY_DERIVED_FEATURE=f"array" TYPEDEF_ARRAY_ANCHOR_FEATURE=f"array" -FEATHR_MAVEN_ARTIFACT="com.linkedin.feathr:feathr_2.12:0.8.2" +# Decouple Feathr MAVEN Version from Feathr Python SDK Version +import os +from feathr.version import __version__ +FEATHR_MAVEN_VERSION = os.environ.get("FEATHR_MAVEN_VERSION", __version__) +FEATHR_MAVEN_ARTIFACT=f"com.linkedin.feathr:feathr_2.12:{FEATHR_MAVEN_VERSION}" JOIN_CLASS_NAME="com.linkedin.feathr.offline.job.FeatureJoinJob" GEN_CLASS_NAME="com.linkedin.feathr.offline.job.FeatureGenJob" \ No newline at end of file diff --git a/feathr_project/feathr/version.py b/feathr_project/feathr/version.py new file mode 100644 index 000000000..807119de6 --- /dev/null +++ b/feathr_project/feathr/version.py @@ -0,0 +1 @@ +__version__ = "0.8.0" \ No newline at end of file diff --git a/feathr_project/setup.py b/feathr_project/setup.py index 7736e5c8a..a3cc4ee78 100644 --- a/feathr_project/setup.py +++ b/feathr_project/setup.py @@ -1,3 +1,5 @@ +import sys +import os from setuptools import setup, find_packages from pathlib import Path @@ -5,9 +7,19 @@ root_path = Path(__file__).resolve().parent.parent long_description = (root_path / "docs/README.md").read_text(encoding="utf8") +try: + exec(open("feathr/version.py").read()) +except IOError: + print("Failed to load Feathr version file for packaging.", + file=sys.stderr) + sys.exit(-1) + +VERSION = __version__ # noqa +os.environ["FEATHR_VERSION"] = VERSION + setup( name='feathr', - version='0.8.2', + version=VERSION, long_description=long_description, long_description_content_type="text/markdown", author_email="feathr-technical-discuss@lists.lfaidata.foundation", @@ -37,7 +49,7 @@ "python-snappy<=0.6.1", "deltalake>=0.6.2", "graphlib_backport<=1.0.3", - "protobuf==3.*", + "protobuf<=3.19.4,>=3.0.0", "confluent-kafka<=1.9.2", "databricks-cli<=0.17.3", "avro<=1.11.1", @@ -53,6 +65,10 @@ "azure-core<=1.22.1", "typing_extensions>=4.2.0", "aws-secretsmanager-caching>=1.1.1.5", + # azure-core 1.22.1 is dependent on msrest==0.6.21, if an environment(AML) has a different version of azure-core (say 1.24.0), + # it brings a different version of msrest(0.7.0) which is incompatible with azure-core==1.22.1. Hence we need to pin it. + # See this for more details: https://github.com/Azure/azure-sdk-for-python/issues/24765 + "msrest<=0.6.21", ], tests_require=[ # TODO: This has been depricated "pytest", diff --git a/feathr_project/test/test_azure_snowflake_e2e.py b/feathr_project/test/test_azure_snowflake_e2e.py index c84aa9153..17474ab1b 100644 --- a/feathr_project/test/test_azure_snowflake_e2e.py +++ b/feathr_project/test/test_azure_snowflake_e2e.py @@ -30,7 +30,7 @@ def test_feathr_online_store_agg_features(): feature_names=['f_snowflake_call_center_division_name', 'f_snowflake_call_center_zipcode'], backfill_time=backfill_time) - client.materialize_features(settings) + client.materialize_features(settings, allow_materialize_non_agg_feature=True) # just assume the job is successful without validating the actual result in Redis. Might need to consolidate # this part with the test_feathr_online_store test case client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS) diff --git a/feathr_project/test/test_azure_spark_e2e.py b/feathr_project/test/test_azure_spark_e2e.py index 9e452d2d0..e82e0efe9 100644 --- a/feathr_project/test/test_azure_spark_e2e.py +++ b/feathr_project/test/test_azure_spark_e2e.py @@ -153,7 +153,7 @@ def test_feathr_online_store_non_agg_features(): feature_names=["f_gen_trip_distance", "f_gen_is_long_trip_distance", "f1", "f2", "f3", "f4", "f5", "f6"], backfill_time=backfill_time) - client.materialize_features(settings) + client.materialize_features(settings, allow_materialize_non_agg_feature=True) # just assume the job is successful without validating the actual result in Redis. Might need to consolidate # this part with the test_feathr_online_store test case client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS) diff --git a/feathr_project/test/test_azure_spark_maven_e2e.py b/feathr_project/test/test_azure_spark_maven_e2e.py index b8e7cefb0..6b93bb7a8 100644 --- a/feathr_project/test/test_azure_spark_maven_e2e.py +++ b/feathr_project/test/test_azure_spark_maven_e2e.py @@ -3,8 +3,12 @@ from pathlib import Path from feathr import (BackfillTime, MaterializationSettings) -from feathr import RedisSink +# from feathr import * from feathr.client import FeathrClient +from feathr.definition.dtype import ValueType +from feathr.definition.query_feature_list import FeatureQuery +from feathr.definition.settings import ObservationSettings +from feathr.definition.typed_key import TypedKey from test_fixture import (basic_test_setup, get_online_test_table_name) from test_utils.constants import Constants @@ -22,6 +26,35 @@ def test_feathr_online_store_agg_features(): # Maven package as the dependency and `noop.jar` as the main file client: FeathrClient = basic_test_setup(os.path.join(test_workspace_dir, "feathr_config_maven.yaml")) + + + location_id = TypedKey(key_column="DOLocationID", + key_column_type=ValueType.INT32, + description="location id in NYC", + full_name="nyc_taxi.location_id") + + feature_query = FeatureQuery( + feature_list=["f_location_avg_fare"], key=location_id) + settings = ObservationSettings( + observation_path="wasbs://public@azurefeathrstorage.blob.core.windows.net/sample_data/green_tripdata_2020-04.csv", + event_timestamp_column="lpep_dropoff_datetime", + timestamp_format="yyyy-MM-dd HH:mm:ss") + + now = datetime.now() + # set output folder based on different runtime + if client.spark_runtime == 'databricks': + output_path = ''.join(['dbfs:/feathrazure_cijob','_', str(now.minute), '_', str(now.second), ".avro"]) + else: + output_path = ''.join(['abfss://xchfeathrtest4fs@xchfeathrtest4sto.dfs.core.windows.net/demo_data/output','_', str(now.minute), '_', str(now.second), ".avro"]) + + + client.get_offline_features(observation_settings=settings, + feature_query=feature_query, + output_path=output_path) + + # assuming the job can successfully run; otherwise it will throw exception + client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS) + return backfill_time = BackfillTime(start=datetime( 2020, 5, 20), end=datetime(2020, 5, 20), step=timedelta(days=1)) redisSink = RedisSink(table_name=online_test_table) @@ -51,4 +84,4 @@ def test_feathr_online_store_agg_features(): assert res['239'][0] != None assert res['239'][1] != None assert res['265'][0] != None - assert res['265'][1] != None \ No newline at end of file + assert res['265'][1] != None diff --git a/feathr_project/test/test_feature_materialization.py b/feathr_project/test/test_feature_materialization.py index edd9bb537..e8100578c 100644 --- a/feathr_project/test/test_feature_materialization.py +++ b/feathr_project/test/test_feature_materialization.py @@ -236,7 +236,7 @@ def test_delete_feature_from_redis(): "f_day_of_week" ], backfill_time=backfill_time) - client.materialize_features(settings) + client.materialize_features(settings, allow_materialize_non_agg_feature=True) client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS) diff --git a/feathr_project/test/test_pyduf_preprocessing_e2e.py b/feathr_project/test/test_pyduf_preprocessing_e2e.py index 9ac9c1917..83ace12ea 100644 --- a/feathr_project/test/test_pyduf_preprocessing_e2e.py +++ b/feathr_project/test/test_pyduf_preprocessing_e2e.py @@ -103,7 +103,7 @@ def test_non_swa_feature_gen_with_offline_preprocessing(): "f_day_of_week" ], backfill_time=backfill_time) - client.materialize_features(settings) + client.materialize_features(settings, allow_materialize_non_agg_feature=True) # just assume the job is successful without validating the actual result in Redis. Might need to consolidate # this part with the test_feathr_online_store test case client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS) diff --git a/feathr_project/test/test_user_workspace/feathr_config.yaml b/feathr_project/test/test_user_workspace/feathr_config.yaml index b0f2b259c..e67c803ef 100644 --- a/feathr_project/test/test_user_workspace/feathr_config.yaml +++ b/feathr_project/test/test_user_workspace/feathr_config.yaml @@ -82,7 +82,7 @@ spark_config: # Feathr Job configuration. Support local paths, path start with http(s)://, and paths start with abfs(s):// # this is the default location so end users don't have to compile the runtime again. # feathr_runtime_location: wasbs://public@azurefeathrstorage.blob.core.windows.net/feathr-assembly-LATEST.jar - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" databricks: # workspace instance workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' @@ -93,7 +93,7 @@ spark_config: # Feathr Job location. Support local paths, path start with http(s)://, and paths start with dbfs:/ work_dir: 'dbfs:/feathr_getting_started' # this is the default location so end users don't have to compile the runtime again. - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" online_store: redis: diff --git a/feathr_project/test/test_user_workspace/feathr_config_registry_purview.yaml b/feathr_project/test/test_user_workspace/feathr_config_registry_purview.yaml index 003e424eb..f716da0b4 100644 --- a/feathr_project/test/test_user_workspace/feathr_config_registry_purview.yaml +++ b/feathr_project/test/test_user_workspace/feathr_config_registry_purview.yaml @@ -25,13 +25,13 @@ spark_config: workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' executor_size: 'Small' executor_num: 1 - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" databricks: workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' workspace_token_value: '' config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}} work_dir: 'dbfs:/feathr_getting_started' - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" online_store: redis: diff --git a/feathr_project/test/test_user_workspace/feathr_config_registry_purview_rbac.yaml b/feathr_project/test/test_user_workspace/feathr_config_registry_purview_rbac.yaml index 0722a34d8..c842bc702 100644 --- a/feathr_project/test/test_user_workspace/feathr_config_registry_purview_rbac.yaml +++ b/feathr_project/test/test_user_workspace/feathr_config_registry_purview_rbac.yaml @@ -25,13 +25,13 @@ spark_config: workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' executor_size: 'Small' executor_num: 1 - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" databricks: workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' workspace_token_value: '' config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}} work_dir: 'dbfs:/feathr_getting_started' - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" online_store: redis: diff --git a/feathr_project/test/test_user_workspace/feathr_config_registry_sql.yaml b/feathr_project/test/test_user_workspace/feathr_config_registry_sql.yaml index 507b096cb..dcb73d827 100644 --- a/feathr_project/test/test_user_workspace/feathr_config_registry_sql.yaml +++ b/feathr_project/test/test_user_workspace/feathr_config_registry_sql.yaml @@ -25,13 +25,13 @@ spark_config: workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' executor_size: 'Small' executor_num: 1 - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" databricks: workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' workspace_token_value: '' config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}} work_dir: 'dbfs:/feathr_getting_started' - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" online_store: redis: diff --git a/feathr_project/test/test_user_workspace/feathr_config_registry_sql_rbac.yaml b/feathr_project/test/test_user_workspace/feathr_config_registry_sql_rbac.yaml index 9403cb190..29c6889e8 100644 --- a/feathr_project/test/test_user_workspace/feathr_config_registry_sql_rbac.yaml +++ b/feathr_project/test/test_user_workspace/feathr_config_registry_sql_rbac.yaml @@ -25,13 +25,13 @@ spark_config: workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' executor_size: 'Small' executor_num: 1 - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" databricks: workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' workspace_token_value: '' config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}} work_dir: 'dbfs:/feathr_getting_started' - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.2.jar" + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" online_store: redis: diff --git a/ui/package-lock.json b/ui/package-lock.json index b3a0d27d8..28bd6553c 100644 --- a/ui/package-lock.json +++ b/ui/package-lock.json @@ -13,6 +13,7 @@ "antd": "^4.20.2", "axios": "^0.27.2", "dagre": "^0.8.5", + "dayjs": "^1.11.5", "react": "^17.0.2", "react-dom": "^17.0.2", "react-flow-renderer": "^9.7.4", @@ -6353,8 +6354,9 @@ } }, "node_modules/dayjs": { - "version": "1.11.1", - "license": "MIT" + "version": "1.11.5", + "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.5.tgz", + "integrity": "sha512-CAdX5Q3YW3Gclyo5Vpqkgpj8fSdLQcRuzfX6mC6Phy0nfJ0eGYOeS7m4mt2plDWLAtA4TqTakvbboHvUxfe4iA==" }, "node_modules/debug": { "version": "4.3.4", @@ -20783,7 +20785,9 @@ "version": "2.28.0" }, "dayjs": { - "version": "1.11.1" + "version": "1.11.5", + "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.5.tgz", + "integrity": "sha512-CAdX5Q3YW3Gclyo5Vpqkgpj8fSdLQcRuzfX6mC6Phy0nfJ0eGYOeS7m4mt2plDWLAtA4TqTakvbboHvUxfe4iA==" }, "debug": { "version": "4.3.4", diff --git a/ui/package.json b/ui/package.json index 3a211df1f..dc8ee5e7e 100644 --- a/ui/package.json +++ b/ui/package.json @@ -8,6 +8,7 @@ "antd": "^4.20.2", "axios": "^0.27.2", "dagre": "^0.8.5", + "dayjs": "^1.11.5", "react": "^17.0.2", "react-dom": "^17.0.2", "react-flow-renderer": "^9.7.4", diff --git a/ui/src/pages/home/home.css b/ui/src/pages/home/home.css index 5c4a3b8a8..308e45367 100644 --- a/ui/src/pages/home/home.css +++ b/ui/src/pages/home/home.css @@ -1,23 +1,23 @@ -.home .ant-card { - box-shadow: 5px 8px 15px 5px rgba(208, 216, 243, 0.6); - border-radius: 8px; -} - -.home .card-meta { - display: flex; -} - -.home .card-meta .ant-card-meta-avatar { - max-width: 80px; - flex-basis: 30%; - box-sizing: border-box; -} - -.home .card-meta .ant-card-meta-avatar > span { - width: 100%; -} - -.home .card-meta .ant-card-meta-avatar svg { - width: 100%; - height: auto; -} +.home .ant-card { + box-shadow: 5px 8px 15px 5px rgba(208, 216, 243, 0.6); + border-radius: 8px; +} + +.home .card-meta { + display: flex; +} + +.home .card-meta .ant-card-meta-avatar { + max-width: 80px; + flex-basis: 30%; + box-sizing: border-box; +} + +.home .card-meta .ant-card-meta-avatar > span { + width: 100%; +} + +.home .card-meta .ant-card-meta-avatar svg { + width: 100%; + height: auto; +} diff --git a/ui/src/pages/management/components/RoleForm/index.tsx b/ui/src/pages/management/components/RoleForm/index.tsx new file mode 100644 index 000000000..9e073abd8 --- /dev/null +++ b/ui/src/pages/management/components/RoleForm/index.tsx @@ -0,0 +1,126 @@ +import React, { forwardRef, useCallback, useEffect, useState } from "react"; +import { Form, Select, Input, Button, message } from "antd"; +import { listUserRole, addUserRole } from "../../../../api"; + +export interface RoleFormProps { + getRole?: (isAdmin: boolean) => void; +} + +const { Item } = Form; +const { TextArea } = Input; + +const RoleOptions = [ + { label: "Admin", value: "admin" }, + { label: "Producer", value: "producer" }, + { label: "Consumer", value: "consumer" }, +]; + +const ValidateRule = { + scope: [{ required: true, message: "Please select scope!" }], + userName: [{ required: true, message: "Please input user name!" }], + roleName: [{ required: true, message: "Please select role name!" }], + reason: [{ required: true, message: "Please input reason!" }], +}; + +const RoleForm = (props: RoleFormProps, ref: any) => { + const [form] = Form.useForm(); + const { getRole } = props; + const [loading, setLoading] = useState(false); + + const [scopeOptions, setScopeOptions] = useState< + { label: string; value: string }[] + >([]); + + const handleFinish = useCallback( + async (values) => { + try { + setLoading(true); + await addUserRole(values); + form.resetFields(); + message.success("User role is created successfully."); + } catch { + message.error("Failed to create user role."); + } finally { + setLoading(false); + } + }, + [form] + ); + + const handleInit = useCallback(async () => { + try { + const result = await listUserRole(); + if (result.length) { + const dataset = new Set( + result.reduce( + (list: string[], item) => { + list.push(item.scope); + return list; + }, + ["global"] + ) + ); + const options = Array.from(dataset).map((item) => { + return { + label: item, + value: item, + }; + }); + setScopeOptions(options); + return true; + } else { + return false; + } + } catch { + return false; + } + }, []); + + useEffect(() => { + handleInit().then((isAdmin: boolean) => { + getRole?.(isAdmin); + }); + }, [handleInit, getRole]); + + return ( +
+ + + + +