Skip to content

Commit

Permalink
Merge pull request #38 from databricks/adb-exfil
Browse files Browse the repository at this point in the history
Adb exfil
  • Loading branch information
nathanknox authored Mar 21, 2023
2 parents 97e1c03 + a5a40b6 commit 9697f60
Show file tree
Hide file tree
Showing 23 changed files with 466 additions and 119 deletions.
48 changes: 0 additions & 48 deletions examples/test_azure_data_exfiltration_protection/firewall.tf
Original file line number Diff line number Diff line change
@@ -1,13 +1,3 @@
locals {
title_cased_location = title(var.location)
service_tags = {
"databricks" : { "tag" : "AzureDatabricks", "port" : "443" },
"sql" : { "tag" : "Sql.${local.title_cased_location}", "port" : "3306" },
"storage" : { "tag" : "Storage.${local.title_cased_location}", "port" : "443" },
"eventhub" : { "tag" : "EventHub.${local.title_cased_location}", "port" : "9093" }
}
}

resource "azurerm_subnet" "firewall" {
name = "AzureFirewallSubnet"
resource_group_name = azurerm_resource_group.this.name
Expand Down Expand Up @@ -74,44 +64,6 @@ resource "azurerm_firewall_policy_rule_collection_group" "this" {
type = "Http"
}
}

rule {
name = "ganglia-ui"
source_addresses = ["*"]
destination_fqdns = ["cdnjs.com", "cdnjs.cloudflare.com"]
protocols {
port = "443"
type = "Https"
}
}
}

network_rule_collection {
name = "databricks-network-rc"
priority = 100
action = "Allow"

dynamic "rule" {
for_each = var.webapp_and_infra_routes
content {
name = rule.key
source_addresses = ["*"]
destination_ports = ["443"]
destination_addresses = [rule.value]
protocols = ["TCP"]
}
}

dynamic "rule" {
for_each = local.service_tags
content {
name = rule.key
source_addresses = ["*"]
destination_addresses = [rule.value.tag]
destination_ports = [rule.value.port]
protocols = ["TCP"]
}
}
}
}

Expand Down
7 changes: 2 additions & 5 deletions examples/test_azure_data_exfiltration_protection/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,17 @@ resource "azurerm_virtual_network" "this" {
}

module "spoke_vnet" {
# TODO: Get rid of redundant variables - source them from `id`s or something
# TODO: Add Routes for service tags to the route table
source = "../../modules/azure_spoke_vnet"
project_name = var.project_name
location = azurerm_virtual_network.this.location
hub_vnet_name = azurerm_virtual_network.this.name
hub_vnet_id = azurerm_virtual_network.this.id
hub_resource_group_name = azurerm_resource_group.this.name
firewall_name = azurerm_firewall.this.name
firewall_private_ip = azurerm_firewall.this.ip_configuration[0].private_ip_address
spoke_vnet_address_space = var.spoke_vnet_address_space
spoke_resource_group_name = var.spoke_resource_group_name
scc_relay_address_prefixes = var.scc_relay_address_prefixes
privatelink_subnet_address_prefixes = var.privatelink_subnet_address_prefixes
webapp_and_infra_routes = var.webapp_and_infra_routes
public_repos = var.public_repos
tags = var.tags
}

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 59 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
---
page_title: "Provisioning Azure Databricks Hub and Spoke Deployment as per Data Exfiltration Protection and Unity Catalog with Terraform"
---

# Provisioning Azure Databricks Hub and Spoke Deployment as per Data Exfiltration Protection with Terraform

[Reference documentation and blog](https://databricks.com/blog/2020/03/27/data-exfiltration-protection-with-azure-databricks.html)
This Terraform configuration is an implementation of the above blog post.
Note: the firewall rules deviate slightly in that outbound traffic from the firewall is allowed to Databricks resources instead of specifying Databricks worker subnets.
This is to simplify outbound routing in the event that multiple `spoke`s are desired.

Additionally, note that Unity Catalog has been included in this template in order to help modernize the architecture to current standards.

This guide is provided as-is and you can use this guide as the basis for your custom Terraform module.

It uses the following variables in configurations:

## Required

- `project_name`: (Required) The name of the project associated with the infrastructure to be managed by Terraform
- `location`: (Required) The location for the resources in this module
- `databricks_workspace_name`: (Required) The name of the Azure Databricks Workspace to deploy in the spoke vnet
- `privatelink_subnet_address_prefixes`: (Required) The address prefix(es) for the PrivateLink subnet
- `firewall_name`: (Required) The name of the Azure Firewall deployed in your hub Virtual Network
- `firewall_private_ip`: (Required) The hub firewall's private IP address


## Optional

- `hub_resource_group_name`: (Optional) The name of the existing Resource Group containing the hub Virtual Network
- `hub_vnet_name`: (Optional) The name of the existing hub Virtual Network
- `hub_vnet_address_space`: (Optional) The address space for the hub Virtual Network
- `spoke_resource_group_name`: (Optional) The name of the Resource Group to create
- `spoke_vnet_address_space`: (Optional) The address space for the spoke Virtual Network
- `private_subnet_address_prefixes`: (Optional) The address prefix(es) for the Databricks private subnet
- `public_subnet_address_prefixes`: (Optional) The address prefix(es) for the Databricks public subnet
- `firewall_subnet_address_prefixes`: (Optional) The address prefixes for the Azure firewall subnet
- `public_repos`: (Optional) List of public repository IP addresses to allow access to.
- `tags`: (Optional) Map of tags to attach to resources

## Provider initialization

```hcl
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~>3.43.0"
}
databricks = {
source = "databricks/databricks"
version = ">=1.9.2"
}
}
}
```
93 changes: 93 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/firewall.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
resource "azurerm_subnet" "firewall" {
name = "AzureFirewallSubnet"
resource_group_name = azurerm_resource_group.this.name
virtual_network_name = azurerm_virtual_network.this.name

address_prefixes = [var.firewall_subnet_address_prefixes]
service_endpoints = [
"Microsoft.Storage",
"Microsoft.AzureActiveDirectory"
]
}

resource "azurerm_public_ip" "this" {
name = "firewall-public-ip"
location = azurerm_resource_group.this.location
resource_group_name = azurerm_resource_group.this.name
allocation_method = "Static"
sku = "Standard"
}

resource "azurerm_firewall_policy" "this" {
name = "databricks-fwpolicy"
resource_group_name = var.hub_resource_group_name
location = azurerm_resource_group.this.location
}

resource "azurerm_firewall_policy_rule_collection_group" "this" {
name = "databricks-fwpolicy-rcg"
firewall_policy_id = azurerm_firewall_policy.this.id
priority = 200
application_rule_collection {
name = "databricks-app-rc"
priority = 200
action = "Allow"

rule {
name = "public-repos"
source_addresses = ["*"]
destination_fqdns = var.public_repos
protocols {
port = "443"
type = "Https"
}
protocols {
port = "80"
type = "Http"
}
}

rule {
name = "IPinfo"
source_addresses = ["*"]
destination_fqdns = ["*.ipinfo.io", "ipinfo.io"]
protocols {
port = "443"
type = "Https"
}
protocols {
port = "8080"
type = "Http"
}
protocols {
port = "80"
type = "Http"
}
}
}

depends_on = [
resource.azurerm_firewall_policy.this
]

}

resource "azurerm_firewall" "this" {
name = "${azurerm_virtual_network.this.name}-firewall"
location = azurerm_resource_group.this.location
resource_group_name = azurerm_resource_group.this.name
sku_name = "AZFW_VNet"
sku_tier = "Standard"
firewall_policy_id = azurerm_firewall_policy.this.id

ip_configuration {
name = "firewall-public-ip-config"
subnet_id = azurerm_subnet.firewall.id
public_ip_address_id = azurerm_public_ip.this.id
}

depends_on = [
resource.azurerm_firewall_policy_rule_collection_group.this
]

}
50 changes: 50 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
resource "azurerm_resource_group" "this" {
name = var.hub_resource_group_name
location = var.location
}

resource "azurerm_virtual_network" "this" {
name = var.hub_vnet_name
location = azurerm_resource_group.this.location
resource_group_name = azurerm_resource_group.this.name
address_space = [var.hub_vnet_address_space]
}

module "spoke_vnet" {
source = "../../modules/azure_spoke_vnet"
project_name = var.project_name
location = azurerm_virtual_network.this.location
hub_vnet_name = azurerm_virtual_network.this.name
hub_resource_group_name = azurerm_resource_group.this.name
firewall_private_ip = azurerm_firewall.this.ip_configuration[0].private_ip_address
spoke_vnet_address_space = var.spoke_vnet_address_space
spoke_resource_group_name = var.spoke_resource_group_name
privatelink_subnet_address_prefixes = var.privatelink_subnet_address_prefixes
tags = var.tags
depends_on = [
resource.azurerm_resource_group.this
]
}

module "spoke_databricks_workspace" {
source = "../../modules/azure_vnet_injected_databricks_workspace"
workspace_name = var.databricks_workspace_name
databricks_resource_group_name = module.spoke_vnet.rg_name
location = azurerm_virtual_network.this.location
vnet_id = module.spoke_vnet.vnet_id
vnet_name = module.spoke_vnet.vnet_name
nsg_id = module.spoke_vnet.nsg_id
route_table_id = module.spoke_vnet.route_table_id
private_subnet_address_prefixes = var.private_subnet_address_prefixes
public_subnet_address_prefixes = var.public_subnet_address_prefixes
tags = var.tags
}


module "unity_catalog" {
source = "../../modules/azure_uc"

resource_group_id = azurerm_resource_group.this.id
workspaces_to_associate = [module.spoke_databricks_workspace.databricks_workspace_id]

}
15 changes: 15 additions & 0 deletions examples/test_azure_uc_data_exfiltration_protection/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
output "resource_group_name" {
value = azurerm_resource_group.this.name
}

output "virtual_network_name" {
value = azurerm_virtual_network.this.name
}

output "firewall_name" {
value = azurerm_firewall.this.name
}

output "workspace_url" {
value = module.spoke_databricks_workspace.workspace_url
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
provider "azurerm" {
features {}
}

provider "databricks" {
host = module.spoke_databricks_workspace.workspace_url
}
Loading

0 comments on commit 9697f60

Please sign in to comment.