Skip to content

Commit

Permalink
Merge pull request #40 from databricks/adb-adding-uc-with-data-exfil
Browse files Browse the repository at this point in the history
Adding Unity Catalog + Data Exfil Example
  • Loading branch information
nathanknox authored Feb 16, 2023
2 parents 614a9ba + db883f4 commit d1e22f8
Show file tree
Hide file tree
Showing 16 changed files with 393 additions and 13 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 57 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
page_title: "Provisioning Azure Databricks Hub and Spoke Deployment as per Data Exfiltration Protection and Unity Catalog with Terraform"
---

# Provisioning Azure Databricks Hub and Spoke Deployment as per Data Exfiltration Protection with Terraform

[Reference documentation and blog](https://databricks.com/blog/2020/03/27/data-exfiltration-protection-with-azure-databricks.html)
This Terraform configuration is an implementation of the above blog post.
Note: the firewall rules deviate slightly in that outbound traffic from the firewall is allowed to Databricks resources instead of specifying Databricks worker subnets.
This is to simplify outbound routing in the event that multiple `spoke`s are desired.

This guide is provided as-is and you can use this guide as the basis for your custom Terraform module.

It uses the following variables in configurations:

## Required

- `project_name`: (Required) The name of the project associated with the infrastructure to be managed by Terraform
- `location`: (Required) The location for the resources in this module
- `databricks_workspace_name`: (Required) The name of the Azure Databricks Workspace to deploy in the spoke vnet
- `privatelink_subnet_address_prefixes`: (Required) The address prefix(es) for the PrivateLink subnet
- `firewall_name`: (Required) The name of the Azure Firewall deployed in your hub Virtual Network
- `firewall_private_ip`: (Required) The hub firewall's private IP address


## Optional

- `hub_resource_group_name`: (Optional) The name of the existing Resource Group containing the hub Virtual Network
- `hub_vnet_name`: (Optional) The name of the existing hub Virtual Network
- `hub_vnet_address_space`: (Optional) The address space for the hub Virtual Network
- `spoke_resource_group_name`: (Optional) The name of the Resource Group to create
- `spoke_vnet_address_space`: (Optional) The address space for the spoke Virtual Network
- `private_subnet_address_prefixes`: (Optional) The address prefix(es) for the Databricks private subnet
- `public_subnet_address_prefixes`: (Optional) The address prefix(es) for the Databricks public subnet
- `firewall_subnet_address_prefixes`: (Optional) The address prefixes for the Azure firewall subnet
- `public_repos`: (Optional) List of public repository IP addresses to allow access to.
- `tags`: (Optional) Map of tags to attach to resources

## Provider initialization

```hcl
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~>3.43.0"
}
databricks = {
source = "databricks/databricks"
version = ">=1.9.2"
}
}
}
```
93 changes: 93 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/firewall.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
resource "azurerm_subnet" "firewall" {
name = "AzureFirewallSubnet"
resource_group_name = azurerm_resource_group.this.name
virtual_network_name = azurerm_virtual_network.this.name

address_prefixes = [var.firewall_subnet_address_prefixes]
service_endpoints = [
"Microsoft.Storage",
"Microsoft.AzureActiveDirectory"
]
}

resource "azurerm_public_ip" "this" {
name = "firewall-public-ip"
location = azurerm_resource_group.this.location
resource_group_name = azurerm_resource_group.this.name
allocation_method = "Static"
sku = "Standard"
}

resource "azurerm_firewall_policy" "this" {
name = "databricks-fwpolicy"
resource_group_name = var.hub_resource_group_name
location = azurerm_resource_group.this.location
}

resource "azurerm_firewall_policy_rule_collection_group" "this" {
name = "databricks-fwpolicy-rcg"
firewall_policy_id = azurerm_firewall_policy.this.id
priority = 200
application_rule_collection {
name = "databricks-app-rc"
priority = 200
action = "Allow"

rule {
name = "public-repos"
source_addresses = ["*"]
destination_fqdns = var.public_repos
protocols {
port = "443"
type = "Https"
}
protocols {
port = "80"
type = "Http"
}
}

rule {
name = "IPinfo"
source_addresses = ["*"]
destination_fqdns = ["*.ipinfo.io", "ipinfo.io"]
protocols {
port = "443"
type = "Https"
}
protocols {
port = "8080"
type = "Http"
}
protocols {
port = "80"
type = "Http"
}
}
}

depends_on = [
resource.azurerm_firewall_policy.this
]

}

resource "azurerm_firewall" "this" {
name = "${azurerm_virtual_network.this.name}-firewall"
location = azurerm_resource_group.this.location
resource_group_name = azurerm_resource_group.this.name
sku_name = "AZFW_VNet"
sku_tier = "Standard"
firewall_policy_id = azurerm_firewall_policy.this.id

ip_configuration {
name = "firewall-public-ip-config"
subnet_id = azurerm_subnet.firewall.id
public_ip_address_id = azurerm_public_ip.this.id
}

depends_on = [
resource.azurerm_firewall_policy_rule_collection_group.this
]

}
52 changes: 52 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
resource "azurerm_resource_group" "this" {
name = var.hub_resource_group_name
location = var.location
}

resource "azurerm_virtual_network" "this" {
name = var.hub_vnet_name
location = azurerm_resource_group.this.location
resource_group_name = azurerm_resource_group.this.name
address_space = [var.hub_vnet_address_space]
}

module "spoke_vnet" {
# TODO: Get rid of redundant variables - source them from `id`s or something
# TODO: Add Routes for service tags to the route table
source = "../../modules/azure_spoke_vnet"
project_name = var.project_name
location = azurerm_virtual_network.this.location
hub_vnet_name = azurerm_virtual_network.this.name
hub_resource_group_name = azurerm_resource_group.this.name
firewall_private_ip = azurerm_firewall.this.ip_configuration[0].private_ip_address
spoke_vnet_address_space = var.spoke_vnet_address_space
spoke_resource_group_name = var.spoke_resource_group_name
privatelink_subnet_address_prefixes = var.privatelink_subnet_address_prefixes
tags = var.tags
depends_on = [
resource.azurerm_resource_group.this
]
}

module "spoke_databricks_workspace" {
source = "../../modules/azure_vnet_injected_databricks_workspace"
workspace_name = var.databricks_workspace_name
databricks_resource_group_name = module.spoke_vnet.rg_name
location = azurerm_virtual_network.this.location
vnet_id = module.spoke_vnet.vnet_id
vnet_name = module.spoke_vnet.vnet_name
nsg_id = module.spoke_vnet.nsg_id
route_table_id = module.spoke_vnet.route_table_id
private_subnet_address_prefixes = var.private_subnet_address_prefixes
public_subnet_address_prefixes = var.public_subnet_address_prefixes
tags = var.tags
}


module "unity_catalog" {
source = "../../modules/azure_uc"

resource_group_id = azurerm_resource_group.this.id
workspaces_to_associate = [module.spoke_databricks_workspace.databricks_workspace_id]

}
15 changes: 15 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
output "resource_group_name" {
value = azurerm_resource_group.this.name
}

output "virtual_network_name" {
value = azurerm_virtual_network.this.name
}

output "firewall_name" {
value = azurerm_firewall.this.name
}

output "workspace_url" {
value = module.spoke_databricks_workspace.workspace_url
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
provider "azurerm" {
features {}
}

provider "databricks" {
host = module.spoke_databricks_workspace.workspace_url
}
80 changes: 80 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
variable "project_name" {
type = string
description = "(Required) The name of the project associated with the infrastructure to be managed by Terraform"
}

variable "location" {
type = string
description = "(Required) The location for the resources in this module"
}

variable "hub_resource_group_name" {
type = string
description = "(Optional) The name for the hub Resource Group"
default = "hub-rg"
}

variable "hub_vnet_name" {
type = string
description = "(Optional) The name for the hub Virtual Network"
default = "hub-vnet"
}

variable "hub_vnet_address_space" {
type = string
description = "(Optional) The address space for the hub Virtual Network"
default = "10.3.1.0/24"
}

variable "spoke_resource_group_name" {
type = string
description = "(Optional) The name of the Resource Group to create"
default = "spoke-rg"
}

variable "spoke_vnet_address_space" {
type = string
description = "(Optional) The address space for the spoke Virtual Network"
default = "10.2.1.0/24"
}

variable "databricks_workspace_name" {
type = string
description = "(Required) The name of the Azure Databricks Workspace to deploy"
}

variable "privatelink_subnet_address_prefixes" {
type = list(string)
description = "(Optional) The address prefix(es) for the PrivateLink subnet"
default = ["10.2.1.0/26"]
}

variable "private_subnet_address_prefixes" {
type = list(string)
description = "(Optional) The address prefix(es) for the Databricks private subnet"
default = ["10.2.1.128/26"]
}

variable "public_subnet_address_prefixes" {
type = list(string)
description = "(Optional) The address prefix(es) for the Databricks public subnet"
default = ["10.2.1.64/26"]
}

variable "firewall_subnet_address_prefixes" {
type = string
description = "(Optional) The address prefixes for the Azure firewall subnet"
default = "10.3.1.0/26"
}

variable "public_repos" {
type = list(string)
description = "(Optional) List of public repository IP addresses to allow access to."
default = ["python.org", "*.python.org", "pypi.org", "*.pypi.org", "pythonhosted.org", "*.pythonhosted.org", "cran.r-project.org", "*.cran.r-project.org", "r-project.org"]
}

variable "tags" {
type = map(string)
description = "(Optional) Map of tags to attach to resources"
default = {}
}
14 changes: 14 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~>3.43.0"
}

databricks = {
source = "databricks/databricks"
version = ">=1.9.2"
}

}
}
Loading

0 comments on commit d1e22f8

Please sign in to comment.