diff --git a/LICENSE.bin b/LICENSE.bin index 34723024e78..effaa4ac4a2 100644 --- a/LICENSE.bin +++ b/LICENSE.bin @@ -304,6 +304,7 @@ Apache Iceberg Aliyun Apache Iceberg api Apache Iceberg AWS + Apache Iceberg Azure Apache Iceberg core Apache Iceberg Hive metastore Apache Iceberg GCP diff --git a/catalogs/catalog-lakehouse-iceberg/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java b/catalogs/catalog-lakehouse-iceberg/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java index 375edd600fb..9e1c184cad9 100644 --- a/catalogs/catalog-lakehouse-iceberg/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-iceberg/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java @@ -33,6 +33,7 @@ import org.apache.gravitino.iceberg.common.IcebergCatalogBackend; import org.apache.gravitino.iceberg.common.authentication.AuthenticationConfig; import org.apache.gravitino.iceberg.common.authentication.kerberos.KerberosConfig; +import org.apache.gravitino.storage.AzureProperties; import org.apache.gravitino.storage.OSSProperties; import org.apache.gravitino.storage.S3Properties; @@ -91,25 +92,37 @@ public class IcebergCatalogPropertiesMetadata extends BaseCatalogPropertiesMetad "s3 access key ID", false /* immutable */, null /* defaultValue */, - true /* hidden */), + false /* hidden */), stringOptionalPropertyEntry( S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, "s3 secret access key", false /* immutable */, null /* defaultValue */, - true /* hidden */), + false /* hidden */), stringOptionalPropertyEntry( OSSProperties.GRAVITINO_OSS_ACCESS_KEY_ID, "OSS access key ID", false /* immutable */, null /* defaultValue */, - true /* hidden */), + false /* hidden */), stringOptionalPropertyEntry( OSSProperties.GRAVITINO_OSS_ACCESS_KEY_SECRET, "OSS access key secret", false /* immutable */, null /* defaultValue */, - true /* hidden */)); + false /* hidden */), + stringOptionalPropertyEntry( + AzureProperties.GRAVITINO_AZURE_STORAGE_ACCOUNT_NAME, + "Azure storage account name", + false /* immutable */, + null /* defaultValue */, + false /* hidden */), + stringOptionalPropertyEntry( + AzureProperties.GRAVITINO_AZURE_STORAGE_ACCOUNT_KEY, + "Azure storage account key", + false /* immutable */, + null /* defaultValue */, + false /* hidden */)); HashMap> result = Maps.newHashMap(); result.putAll(Maps.uniqueIndex(propertyEntries, PropertyEntry::getName)); result.putAll(KerberosConfig.KERBEROS_PROPERTY_ENTRIES); diff --git a/docs/lakehouse-iceberg-catalog.md b/docs/lakehouse-iceberg-catalog.md index 393ef26b8cf..6ad011d7160 100644 --- a/docs/lakehouse-iceberg-catalog.md +++ b/docs/lakehouse-iceberg-catalog.md @@ -28,10 +28,7 @@ Builds with Apache Iceberg `1.5.2`. The Apache Iceberg table format version is ` - Works as a catalog proxy, supporting `Hive`, `JDBC` and `REST` as catalog backend. - Supports DDL operations for Iceberg schemas and tables. - Doesn't support snapshot or table management operations. -- Supports multi storage. - - S3 - - HDFS - - OSS +- Supports multi storage, including S3, GCS, ADLS, OSS and HDFS. - Supports Kerberos or simple authentication for Iceberg catalog with Hive backend. ### Catalog properties @@ -119,6 +116,22 @@ Please make sure the credential file is accessible by Gravitino, like using `exp Please set `warehouse` to `gs://{bucket_name}/${prefix_name}`, and download [Iceberg GCP bundle jar](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-gcp-bundle) and place it to `catalogs/lakehouse-iceberg/libs/`. ::: +#### ADLS + +Supports using Azure account name and secret key to access ADLS data. + +| Configuration item | Description | Default value | Required | Since Version | +|------------------------------|-----------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `io-impl` | The io implementation for `FileIO` in Iceberg, use `org.apache.iceberg.azure.adlsv2.ADLSFileIO` for ADLS. | (none) | No | 0.6.0-incubating | +| `azure-storage-account-name` | The static storage account name used to access ADLS data. | (none) | No | 0.8.0-incubating | +| `azure-storage-account-key` | The static storage account key used to access ADLS data. | (none) | No | 0.8.0-incubating | + +For other Iceberg ADLS properties not managed by Gravitino like `adls.read.block-size-bytes`, you could config it directly by `gravitino.iceberg-rest.adls.read.block-size-bytes`. + +:::info +Please set `warehouse` to `abfs[s]://{container-name}@{storage-account-name}.dfs.core.windows.net/{path}`, and download the [Iceberg Azure bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-azure-bundle) and place it to `catalogs/lakehouse-iceberg/libs/`. +::: + #### Other storages For other storages that are not managed by Gravitino directly, you can manage them through custom catalog properties. diff --git a/docs/spark-connector/spark-catalog-iceberg.md b/docs/spark-connector/spark-catalog-iceberg.md index e4933a3036f..28f2b55c7e6 100644 --- a/docs/spark-connector/spark-catalog-iceberg.md +++ b/docs/spark-connector/spark-catalog-iceberg.md @@ -111,7 +111,13 @@ Gravitino spark connector will transform below property names which are defined | `io-impl` | `io-impl` | The io implementation for `FileIO` in Iceberg. | 0.6.0-incubating | | `s3-endpoint` | `s3.endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | 0.6.0-incubating | | `s3-region` | `client.region` | The region of the S3 service, like `us-west-2`. | 0.6.0-incubating | +| `s3-access-key-id` | `s3.access-key-id` | The static access key ID used to access S3 data. | 0.8.0-incubating | +| `s3-secret-access-key` | `s3.secret-access-key` | The static secret access key used to access S3 data. | 0.8.0-incubating | | `oss-endpoint` | `oss.endpoint` | The endpoint of Aliyun OSS service. | 0.7.0-incubating | +| `oss-access-key-id` | `client.access-key-id` | The static access key ID used to access OSS data. | 0.8.0-incubating | +| `oss-secret-access-key` | `client.access-key-secret` | The static secret access key used to access OSS data. | 0.8.0-incubating | +| `azure-storage-account-name` | `adls.auth.shared-key.account.name` | The static storage account name used to access ADLS data. | 0.8.0-incubating | +| `azure-storage-account-key` | `adls.auth.shared-key.account.key` | The static storage account key used to access ADLS data.. | 0.8.0-incubating | Gravitino catalog property names with the prefix `spark.bypass.` are passed to Spark Iceberg connector. For example, using `spark.bypass.clients` to pass the `clients` to the Spark Iceberg connector. @@ -121,17 +127,23 @@ Iceberg catalog property `cache-enabled` is setting to `false` internally and no ## Storage +Spark connector could convert storage properties in the Gravitino catalog to Spark Iceberg connector automatically, No extra configuration is needed for `S3`, `ADLS`, `OSS`, `GCS`. + ### S3 -You need to add s3 secret to the Spark configuration using `spark.sql.catalog.${iceberg_catalog_name}.s3.access-key-id` and `spark.sql.catalog.${iceberg_catalog_name}.s3.secret-access-key`. Additionally, download the [Iceberg AWS bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws-bundle) and place it in the classpath of Spark. +Please downloading the [Iceberg AWS bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws-bundle) and place it in the classpath of Spark. ### OSS -You need to add OSS secret key to the Spark configuration using `spark.sql.catalog.${iceberg_catalog_name}.client.access-key-id` and `spark.sql.catalog.${iceberg_catalog_name}.client.access-key-secret`. Additionally, download the [Aliyun OSS SDK](https://gosspublic.alicdn.com/sdks/java/aliyun_java_sdk_3.10.2.zip) and copy `aliyun-sdk-oss-3.10.2.jar`, `hamcrest-core-1.1.jar`, `jdom2-2.0.6.jar` in the classpath of Spark. +Please downloading the [Aliyun OSS SDK](https://gosspublic.alicdn.com/sdks/java/aliyun_java_sdk_3.10.2.zip) and copy `aliyun-sdk-oss-3.10.2.jar`, `hamcrest-core-1.1.jar`, `jdom2-2.0.6.jar` in the classpath of Spark. ### GCS -No extra configuration is needed. Please make sure the credential file is accessible by Spark, like using `export GOOGLE_APPLICATION_CREDENTIALS=/xx/application_default_credentials.json`, and download [Iceberg GCP bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-gcp-bundle) and place it to the classpath of Spark. +Please make sure the credential file is accessible by Spark, like using `export GOOGLE_APPLICATION_CREDENTIALS=/xx/application_default_credentials.json`, and download [Iceberg GCP bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-gcp-bundle) and place it to the classpath of Spark. + +### ADLS + +Please downloading the [Iceberg Azure bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-azure-bundle) and place it in the classpath of Spark. ### Other storage