Skip to content

Commit

Permalink
Merge branch 'main' into Divyansh-db/divyansh_dlt_pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
Divyansh-db authored Jul 29, 2024
2 parents 1f49251 + 1a309c8 commit 98cd89d
Show file tree
Hide file tree
Showing 19 changed files with 212 additions and 37 deletions.
12 changes: 6 additions & 6 deletions aws/data_aws_unity_catalog_policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"regexp"
"strings"

"github.com/databricks/terraform-provider-databricks/common"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
Expand Down Expand Up @@ -44,16 +45,18 @@ func generateReadContext(ctx context.Context, d *schema.ResourceData, m *common.
},
}
if kmsKey, ok := d.GetOk("kms_name"); ok {
kmsArn := fmt.Sprintf("arn:aws:kms:%s", kmsKey)
if strings.HasPrefix(kmsKey.(string), "arn:aws") {
kmsArn = kmsKey.(string)
}
policy.Statements = append(policy.Statements, &awsIamPolicyStatement{
Effect: "Allow",
Actions: []string{
"kms:Decrypt",
"kms:Encrypt",
"kms:GenerateDataKey*",
},
Resources: []string{
fmt.Sprintf("arn:aws:kms:%s", kmsKey),
},
Resources: []string{kmsArn},
})
}
policyJSON, err := json.MarshalIndent(policy, "", " ")
Expand All @@ -73,9 +76,6 @@ func validateSchema() map[string]*schema.Schema {
"kms_name": {
Type: schema.TypeString,
Optional: true,
ValidateFunc: validation.StringMatch(
regexp.MustCompile(`^[0-9a-zA-Z/_-]+$`),
"must contain only alphanumeric, hyphens, forward slashes, and underscores characters"),
},
"bucket_name": {
Type: schema.TypeString,
Expand Down
57 changes: 57 additions & 0 deletions aws/data_aws_unity_catalog_policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,63 @@ func TestDataAwsUnityCatalogPolicy(t *testing.T) {
compareJSON(t, j, p)
}

func TestDataAwsUnityCatalogPolicyFullKms(t *testing.T) {
d, err := qa.ResourceFixture{
Read: true,
Resource: DataAwsUnityCatalogPolicy(),
NonWritable: true,
ID: ".",
HCL: `
aws_account_id = "123456789098"
bucket_name = "databricks-bucket"
role_name = "databricks-role"
kms_name = "arn:aws:kms:us-west-2:111122223333:key/databricks-kms"
`,
}.Apply(t)
assert.NoError(t, err)
j := d.Get("json").(string)
p := `{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:ListBucket",
"s3:GetBucketLocation"
],
"Resource": [
"arn:aws:s3:::databricks-bucket/*",
"arn:aws:s3:::databricks-bucket"
]
},
{
"Effect": "Allow",
"Action": [
"sts:AssumeRole"
],
"Resource": [
"arn:aws:iam::123456789098:role/databricks-role"
]
},
{
"Effect": "Allow",
"Action": [
"kms:Decrypt",
"kms:Encrypt",
"kms:GenerateDataKey*"
],
"Resource": [
"arn:aws:kms:us-west-2:111122223333:key/databricks-kms"
]
}
]
}`
compareJSON(t, j, p)
}

func TestDataAwsUnityCatalogPolicyWithoutKMS(t *testing.T) {
d, err := qa.ResourceFixture{
Read: true,
Expand Down
2 changes: 2 additions & 0 deletions catalog/resource_metastore_assignment.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ func ResourceMetastoreAssignment() common.Resource {
return err
}
d.Set("metastore_id", ma.MetastoreId)
d.Set("default_catalog_name", ma.DefaultCatalogName)
d.Set("workspace_id", workspaceId)
return nil
})
},
Expand Down
23 changes: 23 additions & 0 deletions catalog/resource_metastore_assignment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,29 @@ func TestMetastoreAssignment_Create(t *testing.T) {
}.ApplyNoError(t)
}

func TestMetastoreAssignment_Import(t *testing.T) {
qa.ResourceFixture{
Fixtures: []qa.HTTPFixture{
{
Method: "GET",
Resource: "/api/2.1/unity-catalog/current-metastore-assignment",
Response: catalog.MetastoreAssignment{
MetastoreId: "a",
WorkspaceId: 123,
DefaultCatalogName: "test_metastore",
},
},
},
Resource: ResourceMetastoreAssignment(),
Read: true,
ID: "123|a",
}.ApplyAndExpectData(t, map[string]any{
"workspace_id": 123,
"metastore_id": "a",
"default_catalog_name": "test_metastore",
})
}

func TestMetastoreAssignmentAccount_Create(t *testing.T) {
qa.ResourceFixture{
Fixtures: []qa.HTTPFixture{
Expand Down
15 changes: 11 additions & 4 deletions catalog/resource_sql_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"log"
"reflect"
"slices"
"strings"
"time"

Expand Down Expand Up @@ -35,7 +36,7 @@ type SqlTableInfo struct {
DataSourceFormat string `json:"data_source_format,omitempty" tf:"force_new"`
ColumnInfos []SqlColumnInfo `json:"columns,omitempty" tf:"alias:column,computed"`
Partitions []string `json:"partitions,omitempty" tf:"force_new"`
ClusterKeys []string `json:"cluster_keys,omitempty" tf:"force_new"`
ClusterKeys []string `json:"cluster_keys,omitempty"`
StorageLocation string `json:"storage_location,omitempty" tf:"suppress_diff"`
StorageCredentialName string `json:"storage_credential_name,omitempty" tf:"force_new"`
ViewDefinition string `json:"view_definition,omitempty"`
Expand Down Expand Up @@ -274,7 +275,7 @@ func (ti *SqlTableInfo) buildTableCreateStatement() string {
}

if len(ti.ClusterKeys) > 0 {
statements = append(statements, fmt.Sprintf("\nCLUSTER BY (%s)", strings.Join(ti.ClusterKeys, ", "))) // CLUSTER BY (university, major)
statements = append(statements, fmt.Sprintf("\nCLUSTER BY (%s)", ti.getWrappedClusterKeys())) // CLUSTER BY (`university`, `major`)
}

if ti.Comment != "" {
Expand Down Expand Up @@ -307,6 +308,11 @@ func (ci SqlColumnInfo) getWrappedColumnName() string {
return fmt.Sprintf("`%s`", ci.Name)
}

// Wrapping column name with backticks to avoid special character messing things up.
func (ti *SqlTableInfo) getWrappedClusterKeys() string {
return "`" + strings.Join(ti.ClusterKeys, "`,`") + "`"
}

func (ti *SqlTableInfo) getStatementsForColumnDiffs(oldti *SqlTableInfo, statements []string, typestring string) []string {
if len(ti.ColumnInfos) != len(oldti.ColumnInfos) {
statements = ti.addOrRemoveColumnStatements(oldti, statements, typestring)
Expand Down Expand Up @@ -392,8 +398,9 @@ func (ti *SqlTableInfo) diff(oldti *SqlTableInfo) ([]string, error) {
if ti.StorageLocation != oldti.StorageLocation {
statements = append(statements, fmt.Sprintf("ALTER TABLE %s SET %s", ti.SQLFullName(), ti.buildLocationStatement()))
}
if !reflect.DeepEqual(ti.ClusterKeys, oldti.ClusterKeys) {
statements = append(statements, fmt.Sprintf("ALTER TABLE %s CLUSTER BY (%s)", ti.SQLFullName(), strings.Join(ti.ClusterKeys, ", ")))
equal := slices.Equal(ti.ClusterKeys, oldti.ClusterKeys)
if !equal {
statements = append(statements, fmt.Sprintf("ALTER TABLE %s CLUSTER BY (%s)", ti.SQLFullName(), ti.getWrappedClusterKeys()))
}
}

Expand Down
91 changes: 90 additions & 1 deletion catalog/resource_sql_table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func TestResourceSqlTableCreateStatement_Liquid(t *testing.T) {
assert.Contains(t, stmt, "USING DELTA")
assert.Contains(t, stmt, "LOCATION 's3://ext-main/foo/bar1' WITH (CREDENTIAL `somecred`)")
assert.Contains(t, stmt, "COMMENT 'terraform managed'")
assert.Contains(t, stmt, "CLUSTER BY (baz, bazz)")
assert.Contains(t, stmt, "CLUSTER BY (`baz`,`bazz`)")
}

func TestResourceSqlTableSerializeProperties(t *testing.T) {
Expand Down Expand Up @@ -520,6 +520,95 @@ func TestResourceSqlTableUpdateTableAndOwner(t *testing.T) {
assert.Equal(t, "bar", d.Get("name"))
}

func TestResourceSqlTableUpdateTableClusterKeys(t *testing.T) {
d, err := qa.ResourceFixture{
CommandMock: func(commandStr string) common.CommandResults {
assert.Equal(t, "ALTER TABLE `main`.`foo`.`bar` CLUSTER BY (`one`)", commandStr)
return common.CommandResults{
ResultType: "",
Data: nil,
}
},
HCL: `
name = "bar"
catalog_name = "main"
schema_name = "foo"
table_type = "EXTERNAL"
data_source_format = "DELTA"
cluster_id = "gone"
column {
name = "one"
type = "string"
comment = "managed comment"
nullable = false
}
column {
name = "two"
type = "string"
nullable = false
}
cluster_keys = ["one"]
`,
InstanceState: map[string]string{
"name": "bar",
"catalog_name": "main",
"schema_name": "foo",
"table_type": "EXTERNAL",
"data_source_format": "DELTA",
"column.#": "2",
"column.0.name": "one",
"column.0.type": "string",
"column.0.comment": "old comment",
"column.0.nullable": "false",
"column.1.name": "two",
"column.1.type": "string",
"column.1.nullable": "false",
},
Fixtures: append([]qa.HTTPFixture{
{
Method: "GET",
Resource: "/api/2.1/unity-catalog/tables/main.foo.bar",
ReuseRequest: true,
Response: SqlTableInfo{
Name: "bar",
CatalogName: "main",
SchemaName: "foo",
TableType: "EXTERNAL",
DataSourceFormat: "DELTA",
StorageCredentialName: "somecred",
ColumnInfos: []SqlColumnInfo{
{
Name: "one",
Type: "string",
Comment: "managed comment",
Nullable: false,
},
{
Name: "two",
Type: "string",
Nullable: false,
},
},
},
},
{
Method: "POST",
Resource: "/api/2.0/clusters/start",
ExpectedRequest: clusters.ClusterID{
ClusterID: "gone",
},
Status: 404,
},
}, createClusterForSql...),
Resource: ResourceSqlTable(),
ID: "main.foo.bar",
Update: true,
}.Apply(t)

assert.NoError(t, err)
assert.Equal(t, "bar", d.Get("name"))
}

func TestResourceSqlTableUpdateView(t *testing.T) {
d, err := qa.ResourceFixture{
CommandMock: func(commandStr string) common.CommandResults {
Expand Down
18 changes: 3 additions & 15 deletions common/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,21 +263,9 @@ func (c *DatabricksClient) ClientForHost(ctx context.Context, url string) (*Data
if err != nil {
return nil, fmt.Errorf("cannot authenticate parent client: %w", err)
}
cfg := &config.Config{
Host: url,
Username: c.Config.Username,
Password: c.Config.Password,
AuthType: c.Config.AuthType,
Token: c.Config.Token,
ClientID: c.Config.ClientID,
ClientSecret: c.Config.ClientSecret,
GoogleServiceAccount: c.Config.GoogleServiceAccount,
GoogleCredentials: c.Config.GoogleCredentials,
InsecureSkipVerify: c.Config.InsecureSkipVerify,
HTTPTimeoutSeconds: c.Config.HTTPTimeoutSeconds,
DebugTruncateBytes: c.Config.DebugTruncateBytes,
DebugHeaders: c.Config.DebugHeaders,
RateLimitPerSecond: c.Config.RateLimitPerSecond,
cfg, err := c.DatabricksClient.Config.NewWithWorkspaceHost(url)
if err != nil {
return nil, fmt.Errorf("cannot configure new client: %w", err)
}
client, err := client.New(cfg)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion docs/data-sources/aws_unity_catalog_assume_role_policy.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ data "databricks_aws_unity_catalog_policy" "this" {
aws_account_id = var.aws_account_id
bucket_name = "databricks-bucket"
role_name = "${var.prefix}-uc-access"
kms_name = "databricks-kms"
kms_name = "arn:aws:kms:us-west-2:111122223333:key/databricks-kms"
}
data "databricks_aws_unity_catalog_assume_role_policy" "this" {
Expand Down
4 changes: 2 additions & 2 deletions docs/data-sources/aws_unity_catalog_policy.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ data "databricks_aws_unity_catalog_policy" "this" {
aws_account_id = var.aws_account_id
bucket_name = "databricks-bucket"
role_name = "${var.prefix}-uc-access"
kms_name = "databricks-kms"
kms_name = "arn:aws:kms:us-west-2:111122223333:key/databricks-kms"
}
data "databricks_aws_unity_catalog_assume_role_policy" "this" {
Expand All @@ -40,7 +40,7 @@ resource "aws_iam_role" "metastore_data_access" {
* `aws_account_id` (Required) The Account ID of the current AWS account (not your Databricks account).
* `bucket_name` (Required) The name of the S3 bucket used as root storage location for [managed tables](https://docs.databricks.com/data-governance/unity-catalog/index.html#managed-table) in Unity Catalog.
* `role_name` (Required) The name of the AWS IAM role that you created in the previous step in the [official documentation](https://docs.databricks.com/data-governance/unity-catalog/get-started.html#configure-a-storage-bucket-and-iam-role-in-aws).
* `kms_name` (Optional) If encryption is enabled, provide the name of the KMS key that encrypts the S3 bucket contents. If encryption is disabled, do not provide this argument.
* `kms_name` (Optional) If encryption is enabled, provide the ARN of the KMS key that encrypts the S3 bucket contents. If encryption is disabled, do not provide this argument.

## Attribute Reference

Expand Down
1 change: 1 addition & 0 deletions docs/data-sources/user.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Data source exposes the following attributes:
- `home` - Home folder of the [user](../resources/user.md), e.g. `/Users/[email protected]`.
- `repos` - Personal Repos location of the [user](../resources/user.md), e.g. `/Repos/[email protected]`.
- `alphanumeric` - Alphanumeric representation of user local name. e.g. `mr_foo`.
- `active` - Whether the [user](../resources/user.md) is active.

* `acl_principal_id` - identifier for use in [databricks_access_control_rule_set](../resources/access_control_rule_set.md), e.g. `users/[email protected]`.

Expand Down
2 changes: 1 addition & 1 deletion docs/guides/unity-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ resource "aws_iam_policy" "external_data_access" {
resource "aws_iam_role" "external_data_access" {
name = local.uc_iam_role
assume_role_policy = data.aws_iam_policy_document.passrole_for_uc.json
assume_role_policy = data.aws_iam_policy_document.this.json
managed_policy_arns = [aws_iam_policy.external_data_access.arn]
tags = merge(var.tags, {
Name = "${local.prefix}-unity-catalog external access IAM role"
Expand Down
3 changes: 1 addition & 2 deletions docs/resources/cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ library {

Installing Python libraries listed in the `requirements.txt` file. Only Workspace paths and Unity Catalog Volumes paths are supported. Requires a cluster with DBR 15.0+.


```hcl
library {
requirements = "/Workspace/path/to/requirements.txt"
Expand Down Expand Up @@ -414,7 +413,7 @@ The following options are [available](https://docs.microsoft.com/en-us/azure/dat

* `availability` - (Optional) Availability type used for all subsequent nodes past the `first_on_demand` ones. Valid values are `SPOT_AZURE`, `SPOT_WITH_FALLBACK_AZURE`, and `ON_DEMAND_AZURE`. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster.
* `first_on_demand` - (Optional) The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster.
* `spot_bid_max_price` - (Optional) The max price for Azure spot instances. Use `-1` to specify the lowest price.
* `spot_bid_max_price` - (Optional) The max bid price used for Azure spot instances. You can set this to greater than or equal to the current spot price. You can also set this to `-1`, which specifies that the instance cannot be evicted on the basis of price. The price for the instance will be the current price for spot instances or the price for a standard instance.

### gcp_attributes

Expand Down
2 changes: 1 addition & 1 deletion docs/resources/instance_pool.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ The following options are [available](https://docs.databricks.com/dev-tools/api/
The following options are [available](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/clusters#--azureattributes):

* `availability` - (Optional) Availability type used for all nodes. Valid values are `SPOT_AZURE` and `ON_DEMAND_AZURE`.
* `spot_bid_max_price` - (Optional) The max price for Azure spot instances. Use `-1` to specify the lowest price.
* `spot_bid_max_price` - (Optional) The max bid price used for Azure spot instances. You can set this to greater than or equal to the current spot price. You can also set this to `-1`, which specifies that the instance cannot be evicted on the basis of price. The price for the instance will be the current price for spot instances or the price for a standard instance.

### gcp_attributes Configuration Block

Expand Down
Loading

0 comments on commit 98cd89d

Please sign in to comment.