diff --git a/examples/aws-databricks-modular-privatelink/.terraform.lock.hcl b/examples/aws-databricks-modular-privatelink/.terraform.lock.hcl deleted file mode 100644 index 88abb3f1..00000000 --- a/examples/aws-databricks-modular-privatelink/.terraform.lock.hcl +++ /dev/null @@ -1,115 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/databricks/databricks" { - version = "1.3.1" - hashes = [ - "h1:ftL4JdcmEuwiB8W2Rs6Et+M0eanEa8KwW0rvlLEeyAQ=", - "zh:0f549815379dfdcb2bd6e27959f49f8e5badbf192a83f633988f792605f0053d", - "zh:0f579435dc1607776f11095652c23ff17ec1ccf3e72dec038d06b7b6b7850221", - "zh:29a22ee31f9b68bd786993e5753f103cc861084f184d8503943aed050ddfad49", - "zh:2a6e0730a49a1ccc4cd4f96ea96113da0d25e30cfde632ac5d3246cfa0277016", - "zh:3b8fe58a743830ae59d0e12c30b3bb36afa4b974d7d2bc6a8b96d16436a2b838", - "zh:409e29eb10658b7c7dd0f8e851154e6bda1a940a3db13b14c4e8a4a69b591986", - "zh:87d4f641ae9a52ec340fbf3a3918c4a5a9f61b8ae36cea098db77812581c3da0", - "zh:b846c39ca0b4fd774ea5186693aaa8f94bc82034c51be07e10766e74986ea130", - "zh:bab7a9a42308f09ccf9a76248cc0c634ff4d2128a8fc3c622fd4e5d856aa3dce", - "zh:bc488d5ee46efd4c9e6149759133061cd995540835c3fe7a64af97d7ad5ae19c", - ] -} - -provider "registry.terraform.io/hashicorp/aws" { - version = "4.32.0" - constraints = "~> 4.0" - hashes = [ - "h1:d4aUL6/J+BFhh1/Nh2rgctt+dqf07H9PipRn297hIIo=", - "zh:062c30cd8bcf29f8ee34c2b2509e4e8695c2bcac8b7a8145e1c72e83d4e68b13", - "zh:1503fabaace96a7eea4d73ced36a02a75ec587760850e58162e7eff419dcbb31", - "zh:39a1fa36f8cb999f048bf0000d9dab40b8b0c77df35584fb08aa8bd6c5052dee", - "zh:471a755d43b51cd7be3e386cebc151ad8d548c5dea798343620476887e721882", - "zh:61ed56fab811e62b8286e606d003f7eeb7e940ef99bb49c1d283d91c0b748cc7", - "zh:80607dfe5f7770d136d5c451308b9861084ffad08139de8014e48672ec43ea3f", - "zh:863bf0a6576f7a969a89631525250d947fbb207d3d13e7ca4f74d86bd97cdda3", - "zh:9a8f2e77e4f99dbb618eb8ad17218a4698833754b50d46da5727323a2050a400", - "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", - "zh:9b74ff6e638c2a470b3599d57c2081e0095976da0a54b6590884d571f930b53b", - "zh:da4fc553d50ae833d860ec95120e271c29b4cb636917ab5991327362b7486bb7", - "zh:f4b86e7df4e846a38774e8e648b41c5ebaddcefa913cfa1864568086b7735575", - ] -} - -provider "registry.terraform.io/hashicorp/http" { - version = "3.1.0" - hashes = [ - "h1:fmokxOn/hzBi7EGkOWgjDc+nK5rxQFf/rzL0T14SUms=", - "zh:04160b9c74dfe105f64678c0521279cda6516a3b8cdb6748078318af64563faf", - "zh:2d9b4df29aab50496b6371d925d6d6b3c45788850599fd7ba553411abc9c8326", - "zh:3d36344fae7cfafabfb7fd1108916d7251dcfd550d13b129c25437b43bc2e461", - "zh:58ea39aab145edb067f0fe183c2def1bfc93b57bd9ab0289074dba511bc17644", - "zh:6e2d491f02ba4e4134ca8a8cb7312b3a691bdad80a33a29f69d58a5740fade0c", - "zh:70a8d3fa67fd5a5fb5d9baba22be01986e38dd0f84f1e40f341fe55b491b0a03", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:88490f4c31bebc185f4eb7b8e3a79e3b5f92b1343f6b0c14a5c5d8c5e1de9261", - "zh:8a2ba55c5621e28faed582218213812803481765f8faea681c5c3edc61646889", - "zh:8c401d8e0c99d9733287c5ad1309692d5c7e166af6711164ad41e3579f48e45f", - "zh:ce344855648da2c575ceb7b3af18e98519d46629e6eb20358f022370745a76d2", - "zh:f9f9fe99000bc7c6b778ce23e5fe16375acad644aa1b4b4894b3cb2e9a2c7903", - ] -} - -provider "registry.terraform.io/hashicorp/local" { - version = "2.2.3" - hashes = [ - "h1:KmHz81iYgw9Xn2L3Carc2uAzvFZ1XsE7Js3qlVeC77k=", - "zh:04f0978bb3e052707b8e82e46780c371ac1c66b689b4a23bbc2f58865ab7d5c0", - "zh:6484f1b3e9e3771eb7cc8e8bab8b35f939a55d550b3f4fb2ab141a24269ee6aa", - "zh:78a56d59a013cb0f7eb1c92815d6eb5cf07f8b5f0ae20b96d049e73db915b238", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:8aa9950f4c4db37239bcb62e19910c49e47043f6c8587e5b0396619923657797", - "zh:996beea85f9084a725ff0e6473a4594deb5266727c5f56e9c1c7c62ded6addbb", - "zh:9a7ef7a21f48fabfd145b2e2a4240ca57517ad155017e86a30860d7c0c109de3", - "zh:a63e70ac052aa25120113bcddd50c1f3cfe61f681a93a50cea5595a4b2cc3e1c", - "zh:a6e8d46f94108e049ad85dbed60354236dc0b9b5ec8eabe01c4580280a43d3b8", - "zh:bb112ce7efbfcfa0e65ed97fa245ef348e0fd5bfa5a7e4ab2091a9bd469f0a9e", - "zh:d7bec0da5c094c6955efed100f3fe22fca8866859f87c025be1760feb174d6d9", - "zh:fb9f271b72094d07cef8154cd3d50e9aa818a0ea39130bc193132ad7b23076fd", - ] -} - -provider "registry.terraform.io/hashicorp/random" { - version = "3.4.3" - hashes = [ - "h1:tL3katm68lX+4lAncjQA9AXL4GR/VM+RPwqYf4D2X8Q=", - "zh:41c53ba47085d8261590990f8633c8906696fa0a3c4b384ff6a7ecbf84339752", - "zh:59d98081c4475f2ad77d881c4412c5129c56214892f490adf11c7e7a5a47de9b", - "zh:686ad1ee40b812b9e016317e7f34c0d63ef837e084dea4a1f578f64a6314ad53", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:84103eae7251384c0d995f5a257c72b0096605048f757b749b7b62107a5dccb3", - "zh:8ee974b110adb78c7cd18aae82b2729e5124d8f115d484215fd5199451053de5", - "zh:9dd4561e3c847e45de603f17fa0c01ae14cae8c4b7b4e6423c9ef3904b308dda", - "zh:bb07bb3c2c0296beba0beec629ebc6474c70732387477a65966483b5efabdbc6", - "zh:e891339e96c9e5a888727b45b2e1bb3fcbdfe0fd7c5b4396e4695459b38c8cb1", - "zh:ea4739860c24dfeaac6c100b2a2e357106a89d18751f7693f3c31ecf6a996f8d", - "zh:f0c76ac303fd0ab59146c39bc121c5d7d86f878e9a69294e29444d4c653786f8", - "zh:f143a9a5af42b38fed328a161279906759ff39ac428ebcfe55606e05e1518b93", - ] -} - -provider "registry.terraform.io/hashicorp/time" { - version = "0.8.0" - hashes = [ - "h1:m9zlQLy7VbjhI4unRTXvrkdV6gcHu7qegEorLWx3pAM=", - "zh:02eabf4c6239c5b950cc99bb214b2c55e8259d911bcb1a1b26988a0227fe10d4", - "zh:05220f907b274347dec0ffa8383becc6a3640324bc5d60e2b938d5429ed81f5e", - "zh:14165bc5a859c9d617fda2cedaeec1b7a20f8590969faa24aa34c1fc273c23b9", - "zh:1abe696cbe17c070ac98745a357760827bc49ff8a6647b9e1a5cb52010edcbe0", - "zh:20ec0ad2dec862fb6412047f4855bbd79d1a2e18a149088b337805f9b3766974", - "zh:3d70d4836b35b4ec9477d49685f6773cc765aea679d19cbeeeb485e2185f620a", - "zh:4137272743250ac557dd8c2ba92c93aa21cf9c85edfa7fbe07a3a94c9e9783a7", - "zh:525304ba8fd0abcc1d767b47114b6dfaf74d2a0afe0eaa656a38e81cc2651313", - "zh:76241458be0613fabcf347068af9ed846f829ba4e683e10beca631be26312db2", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:85f2b4caaf0485c5346a576a2c7a5b1e155b1b72f95f70bfbc80e233e6916622", - "zh:f93d3b0b6553f5a438312ff2b46025b67786f7b76b1ea833a4c72cb29edc1ad2", - ] -} diff --git a/examples/aws-databricks-modular-privatelink/README.md b/examples/aws-databricks-modular-privatelink/README.md index a681f331..3f9ff56a 100644 --- a/examples/aws-databricks-modular-privatelink/README.md +++ b/examples/aws-databricks-modular-privatelink/README.md @@ -1,281 +1,206 @@ -Deploy Multiple AWS Databricks Workspace with CMK, Customer-managed VPC, Private Links, IP Access Lists +Config-driven AWS Databricks Workspaces deployment ========================= -In this example, we created modules and root level template to deploy multiple (e.g. 10+) E2 Databricks workspaces at scale easily. Users of this template minimally should do these: -1. Supply credentials (aws+databricks) and configuration variables for each workspaces -2. Edit the locals block in `main.tf` to decide what & how many workspaces to deploy -3. Run `terraform init` and `terraform apply` to deploy 1 or more workspaces into your VPC. -4. Optionally, take the outputs files in `/artifacts` and patch each workspace with IP Access List. - -This modular design also allows customer to deploy, manage and delete `individual` workspace(s) easily, with minimal configuration needed. This template takes heavy reference (e.g. CMK module + Private Links) from https://github.com/andyweaves/databricks-terraform-e2e-examples from andrew.weaver@databricks.com and this repo is adapted to meet specific customer requirements. +This example provides a config-driven approach to deploy AWS Databricks environments, including AWS Infra, Databricks workspaces, and UC catalogs. This example takes references from Databricks SRA template [https://github.com/databricks/terraform-databricks-sra/tree/main]. + +You can easily deploy: +1. Multiple VPC as dedicated environments, with each VPC hosting multiple Databricks workspaces compute plane. +2. All workspaces will use Databricks backend private link, CMK. +3. Multiple UC catalogs with dedicated IAM role and External Locations, S3 for storing datasets into isolated infra. ## Architecture -> To be added - LucidChart brewing... - -## Project Folder Structure - - . - ├── iam.tf - ├── instance_profile.tf - ├── main.tf - ├── outputs.tf - ├── privatelink.tf - ├── providers.tf - ├── variables.tf - ├── vpc.tf - ├── artifacts # stores workspaces URL and other info for next stage deployment - ├── workspace_1_deployment.json - ├── ... - ├── modules - ├── databricks_cmk - ├── data.tf - ├── main.tf - ├── outputs.tf - ├── providers.tf - ├── variables.tf - ├── mws_workspace - ├── main.tf - ├── variables.tf - ├── outputs.tf - ├── modules - ├── mws_network - ├── main.tf - ├── variables.tf - ├── outputs.tf - ├── mws_storage - ├── main.tf - ├── variables.tf - ├── outputs.tf - - -## Get Started - -> Step 1: Clone this repo to local, set environment variables for `aws` and `databricks` providers authentication: - -```bash -export TF_VAR_databricks_account_client_id=your_account_level_spn_application_id -export TF_VAR_databricks_account_client_secret=your_account_level_spn_secret -export TF_VAR_databricks_account_id=your_databricks_account_id +![Architecture](./artifacts/aws_db_config_driven.jpeg) -export AWS_ACCESS_KEY_ID=your_aws_role_access_key_id -export AWS_SECRET_ACCESS_KEY=your_aws_role_secret_access_key -``` +## Getting Started -> Step 2: Modify `variables.tf`, for each workspace you need to write a variable block like this, all attributes are required: +We provide 2 pipelines to deploy: +- Account level resources (e.g. workspaces, metastore, identities). +- Workspace level resources (e.g. clusters), UC resources (e.g. catalogs, external locations) are deployed using workspace level provider. -```terraform -variable "workspace_1_config" { - default = { - private_subnet_pair = { subnet1_cidr = "10.109.6.0/23", subnet2_cidr = "10.109.8.0/23" } - workspace_name = "test-workspace-1" - prefix = "ws1" // prefix decides subnets name - region = "ap-southeast-1" - root_bucket_name = "test-workspace-1-rootbucket" - block_list = ["58.133.93.159"] - allow_list = [] // if allow_list empty, all public IP not blocked by block_list are allowed - tags = { - "Name" = "test-workspace-1-tags", - "Env" = "test-ws-1" // add more tags if needed, tags will be applied on databricks subnets and root s3 bucket, but workspace objects like clusters tag needs to be defined in workspace config elsewhere - } - } -} -``` +You can separate out these 2 pipelines into different projects, instead of keeping everything in the same repo folder. -Since we are using CMK (customer managed key) for encryption on root S3 bucket and Databricks managed resources, you also need to provide an AWS IAM ARN for `cmk_admin`. The format will be: `arn:aws:iam::123456:user/xxx`. You need to create this user and assign KMS admin role to it. +> Step 1.1: Manually create a service principal with account admin role on Account Console, generate client secret; and note down client_id and client_secret values. +> Step 1.2: Clone the repo, set following env vars for `aws` and `databricks` providers authentication for authentication: -> Step 3: Modify `main.tf` - locals block, add/remove your workspace config var inside locals, like this: +```bash +export TF_VAR_client_id=xxxx # your databricks spn client id +export TF_VAR_client_secret=xxxx # your databricks spn client value +export TF_VAR_databricks_account_id=xxxx -```terraform -workspace_confs = { - workspace_1 = var.workspace_1_config - workspace_2 = var.workspace_2_config - workspace_3 = var.workspace_3_config -} +export TF_VAR_aws_account_id=xxxx +export AWS_ACCESS_KEY_ID=your_aws_role_access_key_id +export AWS_SECRET_ACCESS_KEY=your_aws_role_secret_access_key ``` -> Step 4: Check your VPC and subnet CIDR, then run `terraform init` and `terraform apply` to deploy your workspaces; this will deploy multiple E2 workspaces into your VPC. - -We are calling the module `mws_workspace` to create multiple workspaces by batch, you should treat this concept as a group of workspaces that share the same VPC in a region. If you want to deploy workspaces in different VPCs, you need to create multiple `mws_workspace` instances. - -In the default setting, this template creates one VPC (with one public subnet and one private subnet for hosting VPCEs). Each incoming workspace will add 2 private subnets into this VPC. If you need to create multiple VPCs, you should copy paste the VPC configs and change accordingly, or you can wrap VPC configs into a module, we leave this to you. - -At this step, your workspaces deployment and VPC networking infra should have been successfully deployed and you will have `n` config json files for `n` workspaces deployed, under `/artifacts` folder, to be used in another Terraform project to deploy workspace objects including IP Access List. +Then in root level `variables.tf`, change the region default value to your region. +With env vars supplied and region set, we are ready to deploy both account and workspace level pipelines in sequence: + +### Deploy Account Level Pipeline: + +> Step 2.1: In root folder `./configs`, create your own yaml file according to your requirements, example yaml files (`config-1.yaml`) are provided for your reference. Each environment means its dedicated VPC, aws infra and workspaces, below are the definitions: + +```yml +# VPC Configuration +vpc: + name: "demo-vpc1" + cidr: "10.0.0.0/16" + +workspace_number: 2 # how many workspaces & security groups will be created +resource_prefix: "tf-vpc" + +subnets: + number_of_azs: 2 + private: + - name: "private-subnet-1" + cidr: "10.0.1.0/24" + ... + public: + - name: "public-subnet-1" + cidr: "10.0.101.0/24" + ... + intra: + - name: "privatelink-subnet-1" + cidr: "10.0.103.0/27" + ... + +scc_relay: "com.amazonaws.vpce.ap-southeast-1.vpce-svc-0557367c6fc1a0c5c" +workspace: "com.amazonaws.vpce.ap-southeast-1.vpce-svc-02535b257fc253ff4" + +# Metastore Deployment Flag +deploy_metastore: "true" +metastore_admin_group_name: "metastore_admin_group" +deploy_log_delivery: "true" + +# Optional: Add any other configuration parameters here +tags: + Environment: "Development" + Project: "Databricks" + ManagedBy: "Terraform" +``` -## Private Links +- `workspace_number: 2`: deploy multiple workspaces in the VPC. Each workspace will get a dedicated Security Group. +- `subnets`: For `n` `number_of_azs` (at least 2 az required), each workspace will be assigned with `n` private subnets in sequence of the yaml definition. In the above example, workspace 1 will take private subnet 1, 2; workspace 2 will take private subnet 3, 4; in each AZ we will deploy 1 public subnet to host NAT gateway. +- `scc_relay` and `workspace`: these are AWS Databricks private link services, you can find your region's value [here](https://docs.databricks.com/aws/en/resources/ip-domain-region#privatelink). +- `deploy_metastore`: whether to deploy regional UC metastore altogether; if it's a first time deployment and no existing UC metastore in your selected region, choose `true`. All other values are considered `false`. If you already have metastore, provide these 2 attributes instead, check `config-2.yaml` for example: + - `deploy_metastore: "false"` + - `existing_metastore_id: "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"` +- `metastore_admin_group_name`: name of the metastore admin group. +- `deploy_log_delivery`: (optional) By default this will deploy account level log delivery, hence only need to specify this in 1 environment yaml. -In this example, we used 1 VPC for all workspaces, and we used backend VPCE for Databricks clusters to communicate with control plane. All workspaces deployed into the same VPC will share one pair of VPCEs (one for relay, one for rest api), typically since VPCEs can provide considerable bandwidth, you just need one such pair of VPCEs for all workspaces in each region. For HA setup, you can build VPCEs into multiple az as well. +> Step 2.2: In `main.tf`, update the local block to load the yaml files. Also update the `outputs.tf` accordingly. Below setting will load 2 yaml files and deploy 2 environments according to yaml files details. -## IP Access List +```terraform +locals { + environments = { + env1 = { + config_file_path = "${path.module}/configs/config-1.yaml" + }, + env2 = { + config_file_path = "${path.module}/configs/config-2.yaml" + }, + # Add more environments as needed + } -For all the workspaces in this template, we allowed access from the Internet, but we restrict access using IP access list. Each workspace can be customized with `allow_list` and `block_list` in variables block. + # Parse each config file once and store the results + config_files = { + for env_key, env in local.environments : env_key => yamldecode(file(env.config_file_path)) + } +} +``` -The process of IP access list management is separated from Terraform process of workspace deployment. This is because we want: -1. To keep a clean cut between workspace deployment and workspace management. -2. It is general good practice to separate workspace deployment and workspace management. -3. To keep workspace objects deployment in separate terraform project, not to risk leaving orphaned resources and ruins your workspace deployment (e.g. changed provider etc). +You can also use the same yaml-based config to manage users and groups at account level, need to modify `configs/account_users.yaml` accordingly. + +We provice an example `account_users.yaml` and explanation below: + +```yml +# Create users and manage in TF +new_users: + "new.admin@example.com": + display_name: "New Admin User" + ... + +# Import existing users that you want to interact with terraform (add them into group definitions below) +existing_users: + - "your_existing_user@example.com" + +# Explicit section for metastore admins +tf_admin_groups: + metastore_admin_group: + display_name: "metastore_admin_group" + members: + new_metastore_admins: + - "new.admin@example.com" + existing_metastore_admins: + - "your_existing_metastore_admin@example.com" + +# Group users into non Admin groups, each user can exist in 1 or multiple groups; +# Emails must exist in the new_users or existing_users section first +tf_non_admin_groups: + data_engineers: + display_name: "TF Managed Data Engineers" + members: + - "new.de@example.com" + - "new.admin@example.com" + + data_scientists: + display_name: "TF Managed Data Scientists" + members: + - "new.ds@example.com" + + analysts: + display_name: "TF Managed Analysts" + members: + - "new.analyst@example.com" +``` -After you have deployed your workspaces using this template (`aws_databricks_modular_privatelink`), you will have workspace host URLs saved as local file under `/artifacts`. Those files are for you to input to the next Terraform workspace management process, and to patch the workspace IP access list. +- `new_users`: Create these users in TF. +- `existing_users`: Bring in existing users to interact with TF. +- `tf_admin_groups`: Specify group membership for metastore admins. +- `tf_non_admin_groups`: Create new groups, and assign anyone in `new_users` or `existing_users`. -> IP Access List Decision Flow +The group name in `metastore_admin_group` must match the environment yaml file (like `config-1.yaml`). -![alt text](https://raw.githubusercontent.com/databricks/terraform-databricks-examples/main/examples/aws-databricks-modular-privatelink/images/ip-access-lists-flow.png?raw=true) +Then run `terraform init` and `terraform apply` and confirm to deploy the resources, based on your yaml files. For each dedicated environment (yaml file), there will be `n` workspaces, sharing VPC endpoints, and using databricks backend private link. Egress is allowed through NAT, you can also extend the architecture with egress firewall. -> Example - blocked access from workspace: my phone is blocked to access the workspace, since the public IP was in the workspace's block list. +### Deploy Workspace Level Pipeline: -![alt text](https://raw.githubusercontent.com/databricks/terraform-databricks-examples/main/examples/aws-databricks-modular-privatelink/images/ip-access-list-block.png?raw=true) +In this pipeline, we deploy: +1. Multiple UC catalogs with dedicated External locations (mapping to dedicated S3). +2. Pre-create interactive clusters in workspace for users; as example of deploying workspace resource. -> Recommended to keep IP Access List management in a separate Terraform project, to avoid orphaned resources. (Similar error below) +![UC](./artifacts/uc-starter.png) -![alt text](https://raw.githubusercontent.com/databricks/terraform-databricks-examples/main/examples/aws-databricks-modular-privatelink/images/orphaned-resources.png?raw=true) +First, `cd` into `databricks_workspace_tf_pipeline/prerequisite_step/` folder, run terraform init and apply to generate a yaml file in `databricks_workspace_tf_pipeline/configs/`, this will be the catalog specification. -## Tagging +Then move to `databricks_workspace_tf_pipeline` directory, supply values for the variables, make sure you point the databricks_host to the specific workspace URL you are deploying resources into. -We added custom tagging options in `variables.tf` to tag your aws resources: in each workspace's config variable map, you can supply with any number of tags, and these tags will propagate down to resources related to that workspace, like root bucket s3 and the 2 subnets. Note that aws databricks itself does not support tagging, also the abstract layer of `storage_configuration`, and `network_configuration` does not support tagging. Instead, if you need to tag/enforce certain tags for `clusters` and `pools`, do it in `workspace management` terraform projects, (not this directory that deploys workspaces). +One quick way is to still keep the env vars you supplied before, driven by the same service principal: -## Terraform States Files stored in remote S3 -We recommend using remote storage, like S3, for state storage, instead of using default local backend. If you have already applied and retains state files locally, you can also configure s3 backend then apply, it will migrate local state file content into S3 bucket, then local state file will become empty. As you switch the backends, state files are migrated from `A` to `B`. +```bash +export TF_VAR_client_id=xxxx # your databricks spn client id +export TF_VAR_client_secret=xxxx # your databricks spn client value +export TF_VAR_databricks_account_id=xxxx -```terraform -terraform { - backend "s3" { - # Replace this with your bucket name! - bucket = "terraform-up-and-running-state-unique-hwang" - key = "global/s3/terraform.tfstate" - region = "ap-southeast-1" - # Replace this with your DynamoDB table name! - dynamodb_table = "terraform-up-and-running-locks" - encrypt = true - } -} +export TF_VAR_aws_account_id=xxxx +export AWS_ACCESS_KEY_ID=your_aws_role_access_key_id +export AWS_SECRET_ACCESS_KEY=your_aws_role_secret_access_key ``` -You should create the infra for remote backend in another Terraform Project, like the `aws_remote_backend_infra` project in this repo's root level - https://github.com/hwang-db/tf_aws_deployment/tree/main/aws_remote_backend_infra, since we want to separate the backend infra out from any databricks project infra. As shown below, you create a separate set of tf scripts and create the S3 and DynamoDB Table. Then all other tf projects can store their state files in this remote backend. - -![alt text](https://raw.githubusercontent.com/databricks/terraform-databricks-examples/main/examples/aws-databricks-modular-privatelink/images/tf-remote-s3-backend.png?raw=true) - -Tips: If you want to destroy your backend infra (S3+DynamoDB), since your state files of S3 and backend infra are stored in that exact S3, to avoid falling into chicken and egg problem, you need to follow these steps: -1. Comment out remote backend and migrate states to local backend -2. Comment out all backend resources configs, run apply to get rid of them. Or you can run destroy. - -## Common Actions - -### To add specific workspace(s) +And then create a `terraform.tfvars` file to contain the rest of the variables' values as below: -You just need to supply with each workspace's configuration in root level `variables.tf`, similar to the examples given. -Then you need to add the workspaces you want into locals block and run apply. +``` +databricks_host = "https://xxxxxx.cloud.databricks.com" +databricks_users = ["new.de@example.com", "new.ds@example.com", "new.analyst@example.com"] +``` -### To delete specific workspace(s) +Each user in `databricks_users` list will get assiged with a single node interactive cluster for use, with `CAN_RESTART` permission. -Do Not run `terraform destroy` or `terraform destroy -target` for the purpose of deleting resources. Instead, you should just remove resources from your `.tf` scripts and run `terraform apply`. +Then run terraform init and apply, log into your workspace, and you will see deployed catalogs and external locations. -You just need to remove the workspace config from `main.tf` - locals block, then run `terraform apply` to delete the workspace. For example, to delete `workspace_3`, you need to remove the following lines from `main.tf` - locals block, it is optional to remove the same from variable block in `variables.tf`: +![UC](./artifacts/uc.gif) -```terraform -workspace_3 = var.workspace_3_config -``` +The catalogs deployed are configured to be accessible from the current workspace, and can be further configured with binding with different workspaces. -Then run `terraform apply`, workspace_3 will be deleted. - -### Configure IAM roles, S3 access policies and Instance Profile for clusters - -This template illustrates the traditional method of creating Instance Profile to grant cluster with S3 bucket access, see [original official guide](https://docs.databricks.com/administration-guide/cloud-configurations/aws/instance-profiles.html) - -The sample script in `instance_profile.tf` will help you create the underlying IAM role and policies for you to create instance profile at workspace level, you will find the `arn` from tf output, you can then manually take the value and configure at workspace admin setting page like below: - -![alt text](https://raw.githubusercontent.com/databricks/terraform-databricks-examples/main/examples/aws-databricks-modular-privatelink/images/instance-profile.png?raw=true) - -Next you need to configure permissions for users/groups to use this instance profile to spin up clusters, and the cluster will be able to access the S3 specified in the instance profile's IAM role's policy. - - -### Grant Access to other users to use this instance profile - -Deploying instance profile to workspace is obviously a workspace configuration process, and we suggest you write the relevant tf scripts in workspace management project (such as inside `aws_workspace_config`), not in this workspace deployment project. The screenshot in the above step is a manual version of adding instance profile inside your workspace. - -By default, the instance profile you created from the above steps is only accessible to its creator and admin group. Thus you also need to do access control (permissions) and specify who can use such instance profile to spin up clusters. See sample tf script and tutorial here: -[Tutorial](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/instance_profile#granting-access-to-all-users) - - - -## Requirements - -| Name | Version | -| ------------------------------------------------------- | ------- | -| [aws](#requirement\_aws) | ~> 4.0 | - -## Providers - -| Name | Version | -| ---------------------------------------------------------------------------------- | ------- | -| [aws](#provider\_aws) | 4.32.0 | -| [databricks](#provider\_databricks) | 1.3.1 | -| [databricks.mws](#provider\_databricks.mws) | 1.3.1 | -| [http](#provider\_http) | 3.1.0 | -| [local](#provider\_local) | 2.2.3 | -| [random](#provider\_random) | 3.4.3 | -| [time](#provider\_time) | 0.8.0 | - -## Modules - -| Name | Source | Version | -| -------------------------------------------------------------------------------------------------- | ------------------------ | ------- | -| [databricks\_cmk](#module\_databricks\_cmk) | ./modules/databricks_cmk | n/a | -| [workspace\_collection](#module\_workspace\_collection) | ./modules/mws_workspace | n/a | - -## Resources - -| Name | Type | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------- | -| [aws_eip.nat_gateway_elastic_ips](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eip) | resource | -| [aws_iam_role.cross_account_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | -| [aws_iam_role_policy.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | -| [aws_internet_gateway.igw](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/internet_gateway) | resource | -| [aws_nat_gateway.nat_gateways](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/nat_gateway) | resource | -| [aws_route_table.pl_subnet_rt](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route_table) | resource | -| [aws_route_table.public_route_table](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route_table) | resource | -| [aws_route_table_association.dataplane_vpce_rtb](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route_table_association) | resource | -| [aws_route_table_association.public_route_table_associations](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route_table_association) | resource | -| [aws_security_group.privatelink](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | -| [aws_security_group.sg](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | -| [aws_subnet.privatelink](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/subnet) | resource | -| [aws_subnet.public_subnets](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/subnet) | resource | -| [aws_vpc.mainvpc](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/vpc) | resource | -| [aws_vpc_endpoint.backend_relay](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/vpc_endpoint) | resource | -| [aws_vpc_endpoint.backend_rest](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/vpc_endpoint) | resource | -| [databricks_mws_credentials.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_credentials) | resource | -| [databricks_mws_vpc_endpoint.backend_rest_vpce](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_vpc_endpoint) | resource | -| [databricks_mws_vpc_endpoint.relay](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_vpc_endpoint) | resource | -| [local_file.deployment_information](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | -| [random_string.naming](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/string) | resource | -| [time_sleep.wait](https://registry.terraform.io/providers/hashicorp/time/latest/docs/resources/sleep) | resource | -| [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source | -| [databricks_aws_assume_role_policy.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/data-sources/aws_assume_role_policy) | data source | -| [databricks_aws_crossaccount_policy.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/data-sources/aws_crossaccount_policy) | data source | -| [http_http.my](https://registry.terraform.io/providers/hashicorp/http/latest/docs/data-sources/http) | data source | - -## Inputs - -| Name | Description | Type | Default | Required | -| ----------------------------------------------------------------------------------------------------------------------- | ----------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------: | -| [cmk\_admin](#input\_cmk\_admin) | cmk | `string` | `"arn:aws:iam::026655378770:user/hao"` | no | -| [databricks\_account\_id](#input\_databricks\_account\_id) | n/a | `string` | n/a | yes | -| [databricks\_account\_password](#input\_databricks\_account\_client\_secret) | n/a | `string` | n/a | yes | -| [databricks\_account\_client_id](#input\_databricks\_account\_client\_id) | n/a | `string` | n/a | yes | -| [privatelink\_subnets\_cidr](#input\_privatelink\_subnets\_cidr) | n/a | `list(string)` |
[
"10.109.4.0/23"
]
| no | -| [public\_subnets\_cidr](#input\_public\_subnets\_cidr) | n/a | `list(string)` |
[
"10.109.2.0/23"
]
| no | -| [region](#input\_region) | n/a | `string` | `"ap-southeast-1"` | no | -| [relay\_vpce\_service](#input\_relay\_vpce\_service) | n/a | `string` | `"com.amazonaws.vpce.ap-southeast-1.vpce-svc-0557367c6fc1a0c5c"` | no | -| [tags](#input\_tags) | n/a | `map` | `{}` | no | -| [vpc\_cidr](#input\_vpc\_cidr) | n/a | `string` | `"10.109.0.0/17"` | no | -| [workspace\_1\_config](#input\_workspace\_1\_config) | n/a | `map` |
{
"allow_list": [
"65.184.145.97"
],
"block_list": [
"58.133.93.159"
],
"prefix": "ws1",
"private_subnet_pair": {
"subnet1_cidr": "10.109.6.0/23",
"subnet2_cidr": "10.109.8.0/23"
},
"region": "ap-southeast-1",
"root_bucket_name": "test-workspace-1-rootbucket",
"tags": {
"Env": "test-ws-1",
"Name": "test-workspace-1-tags"
},
"workspace_name": "test-workspace-1"
}
| no | -| [workspace\_2\_config](#input\_workspace\_2\_config) | n/a | `map` |
{
"allow_list": [
"65.184.145.97"
],
"block_list": [
"54.112.179.135",
"195.78.164.130"
],
"prefix": "ws2",
"private_subnet_pair": {
"subnet1_cidr": "10.109.10.0/23",
"subnet2_cidr": "10.109.12.0/23"
},
"region": "ap-southeast-1",
"root_bucket_name": "test-workspace-2-rootbucket",
"tags": {
"Name": "test-workspace-2-tags"
},
"workspace_name": "test-workspace-2"
}
| no | -| [workspace\_vpce\_service](#input\_workspace\_vpce\_service) | n/a | `string` | `"com.amazonaws.vpce.ap-southeast-1.vpce-svc-02535b257fc253ff4"` | no | - -## Outputs - -| Name | Description | -| -------------------------------------------------------------------------------------- | ----------- | -| [arn](#output\_arn) | n/a | -| [databricks\_hosts](#output\_databricks\_hosts) | n/a | - +You now have deployed multiple VPCs, with multiple workspaces, and a set of catalogs with isolated S3, external locations, storage credentials to start building data pipelines. \ No newline at end of file diff --git a/examples/aws-databricks-modular-privatelink/artifacts/README.md b/examples/aws-databricks-modular-privatelink/artifacts/README.md deleted file mode 100644 index f8cf95a2..00000000 --- a/examples/aws-databricks-modular-privatelink/artifacts/README.md +++ /dev/null @@ -1,3 +0,0 @@ -## Do not delete this file - -This readme is a dummy file to keep the file structure in sync between local IDE and repo. \ No newline at end of file diff --git a/examples/aws-databricks-modular-privatelink/artifacts/aws_db_config_driven.jpeg b/examples/aws-databricks-modular-privatelink/artifacts/aws_db_config_driven.jpeg new file mode 100644 index 00000000..0496e08b Binary files /dev/null and b/examples/aws-databricks-modular-privatelink/artifacts/aws_db_config_driven.jpeg differ diff --git a/examples/aws-databricks-modular-privatelink/artifacts/uc-starter.png b/examples/aws-databricks-modular-privatelink/artifacts/uc-starter.png new file mode 100644 index 00000000..b40d6c2e Binary files /dev/null and b/examples/aws-databricks-modular-privatelink/artifacts/uc-starter.png differ diff --git a/examples/aws-databricks-modular-privatelink/artifacts/uc.gif b/examples/aws-databricks-modular-privatelink/artifacts/uc.gif new file mode 100644 index 00000000..4cd39bdb Binary files /dev/null and b/examples/aws-databricks-modular-privatelink/artifacts/uc.gif differ diff --git a/examples/aws-databricks-modular-privatelink/configs/account_users.yaml b/examples/aws-databricks-modular-privatelink/configs/account_users.yaml new file mode 100644 index 00000000..6b3b3236 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/configs/account_users.yaml @@ -0,0 +1,48 @@ +# Create users and manage in TF +new_users: + "new.admin@example.com": + display_name: "New Admin User" + + "new.de@example.com": + display_name: "New Data Engineer" + + "new.ds@example.com": + display_name: "New Data Scientist" + + "new.analyst@example.com": + display_name: "New Analyst" + +# Import existing users that you want to interact with terraform (add them into group definitions below) +existing_users: + - "your_existing_user@example.com" + +# Explicit section for metastore admins +tf_admin_groups: + metastore_admin_group: + display_name: "metastore_admin_group" + members: + new_metastore_admins: + - "new.admin@example.com" + existing_metastore_admins: + - "your_existing_user@example.com" + +# Group users into non Admin groups, each user can exist in 1 or multiple groups; +# Emails must exist in the new_users or existing_users section first +tf_non_admin_groups: + data_engineers: + display_name: "TF Managed Data Engineers" + members: + - "new.de@example.com" + - "new.admin@example.com" + + data_scientists: + display_name: "TF Managed Data Scientists" + members: + - "new.ds@example.com" + - "new.de@example.com" + - "your_existing_user@example.com" + + analysts: + display_name: "TF Managed Analysts" + members: + - "new.analyst@example.com" \ No newline at end of file diff --git a/examples/aws-databricks-modular-privatelink/configs/config-1.yaml b/examples/aws-databricks-modular-privatelink/configs/config-1.yaml new file mode 100644 index 00000000..1686d1ea --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/configs/config-1.yaml @@ -0,0 +1,43 @@ +# VPC Configuration +vpc: + name: "demo-vpc-1" + cidr: "10.0.0.0/16" + +workspace_number: 2 +resource_prefix: "tf-vpc" + +subnets: + number_of_azs: 2 + private: + - name: "private-subnet-1" + cidr: "10.0.1.0/24" + - name: "private-subnet-2" + cidr: "10.0.2.0/24" + - name: "private-subnet-3" + cidr: "10.0.3.0/24" + - name: "private-subnet-4" + cidr: "10.0.4.0/24" + public: + - name: "public-subnet-1" + cidr: "10.0.101.0/24" + - name: "public-subnet-2" + cidr: "10.0.102.0/24" + intra: + - name: "privatelink-subnet-1" + cidr: "10.0.103.0/27" + - name: "privatelink-subnet-2" + cidr: "10.0.104.0/27" + +scc_relay: "com.amazonaws.vpce.ap-southeast-1.vpce-svc-0557367c6fc1a0c5c" +workspace: "com.amazonaws.vpce.ap-southeast-1.vpce-svc-02535b257fc253ff4" + +# Metastore Deployment Flag +deploy_metastore: "true" +metastore_admin_group_name: "metastore_admin_group" +deploy_log_delivery: "true" + +# Optional: Add any other configuration parameters here +tags: + Environment: "Development" + Project: "Databricks" + ManagedBy: "Terraform" \ No newline at end of file diff --git a/examples/aws-databricks-modular-privatelink/configs/config-2.yaml b/examples/aws-databricks-modular-privatelink/configs/config-2.yaml new file mode 100644 index 00000000..93c632af --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/configs/config-2.yaml @@ -0,0 +1,39 @@ +# VPC Configuration +vpc: + name: "demo-vpc-2" + cidr: "10.0.0.0/16" + +workspace_number: 1 +resource_prefix: "tf-sec-vpc" + +subnets: + number_of_azs: 2 + private: + - name: "private-subnet-1" + cidr: "10.0.1.0/24" + - name: "private-subnet-2" + cidr: "10.0.2.0/24" + + public: + - name: "public-subnet-1" + cidr: "10.0.101.0/24" + - name: "public-subnet-2" + cidr: "10.0.102.0/24" + + intra: + - name: "privatelink-subnet-1" + cidr: "10.0.103.0/27" + - name: "privatelink-subnet-2" + cidr: "10.0.104.0/27" + +scc_relay: "com.amazonaws.vpce.ap-southeast-1.vpce-svc-0557367c6fc1a0c5c" +workspace: "com.amazonaws.vpce.ap-southeast-1.vpce-svc-02535b257fc253ff4" + +deploy_metastore: "false" +existing_metastore_id: "xxxxx-xxxx-xxxx-xxxx-xxxxxx" + +# Optional: Add any other configuration parameters here +tags: + Environment: "Development" + Project: "Databricks" + ManagedBy: "Terraform" \ No newline at end of file diff --git a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/data.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/cmk.tf similarity index 53% rename from examples/aws-databricks-modular-privatelink/modules/databricks_cmk/data.tf rename to examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/cmk.tf index 6a832140..2a5b70fc 100644 --- a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/data.tf +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/cmk.tf @@ -1,3 +1,6 @@ +# CMK For Managed Services (notebook, sql queries, etc) +data "aws_caller_identity" "current" {} + data "aws_iam_policy_document" "databricks_managed_services_cmk" { version = "2012-10-17" statement { @@ -5,7 +8,7 @@ data "aws_iam_policy_document" "databricks_managed_services_cmk" { effect = "Allow" principals { type = "AWS" - identifiers = [var.cmk_admin] + identifiers = [data.aws_caller_identity.current.account_id] } actions = ["kms:*"] resources = ["*"] @@ -25,6 +28,26 @@ data "aws_iam_policy_document" "databricks_managed_services_cmk" { } } +resource "aws_kms_key" "managed_services_customer_managed_key" { + policy = data.aws_iam_policy_document.databricks_managed_services_cmk.json +} + +resource "aws_kms_alias" "managed_services_customer_managed_key_alias" { + name = "alias/managed-services-customer-managed-key-alias-${var.resource_prefix}" + target_key_id = aws_kms_key.managed_services_customer_managed_key.key_id +} + +resource "databricks_mws_customer_managed_keys" "managed_services" { + provider = databricks.mws + account_id = var.databricks_account_id + aws_key_info { + key_arn = aws_kms_key.managed_services_customer_managed_key.arn + key_alias = aws_kms_alias.managed_services_customer_managed_key_alias.name + } + use_cases = ["MANAGED_SERVICES"] +} + +# CMK For DBFS and EBS data "aws_iam_policy_document" "databricks_storage_cmk" { version = "2012-10-17" statement { @@ -32,7 +55,7 @@ data "aws_iam_policy_document" "databricks_storage_cmk" { effect = "Allow" principals { type = "AWS" - identifiers = [var.cmk_admin] + identifiers = [data.aws_caller_identity.current.account_id] } actions = ["kms:*"] resources = ["*"] @@ -77,7 +100,7 @@ data "aws_iam_policy_document" "databricks_storage_cmk" { effect = "Allow" principals { type = "AWS" - identifiers = [var.cross_account_role_arn] + identifiers = [for i in module.workspace_credential : i.cross_account_role_arn] // a list of workspace cross account iam roles } actions = [ "kms:Decrypt", @@ -93,3 +116,22 @@ data "aws_iam_policy_document" "databricks_storage_cmk" { } } } + +resource "aws_kms_key" "storage_customer_managed_key" { + policy = data.aws_iam_policy_document.databricks_storage_cmk.json +} + +resource "aws_kms_alias" "storage_customer_managed_key_alias" { + name = "alias/storage-customer-managed-key-alias-${var.resource_prefix}" + target_key_id = aws_kms_key.storage_customer_managed_key.key_id +} + +resource "databricks_mws_customer_managed_keys" "storage" { + provider = databricks.mws + account_id = var.databricks_account_id + aws_key_info { + key_arn = aws_kms_key.storage_customer_managed_key.arn + key_alias = aws_kms_alias.storage_customer_managed_key_alias.name + } + use_cases = ["STORAGE"] +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/metastore.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/metastore.tf new file mode 100644 index 00000000..25acdc6a --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/metastore.tf @@ -0,0 +1,8 @@ +resource "databricks_metastore" "this" { + count = var.deploy_metastore == "true" ? 1 : 0 + provider = databricks.mws + owner = var.metastore_admin_group_name + name = "${var.region}-uc-metastore" + region = var.region + force_destroy = true +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/account_log_delivery/log_delivery.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/account_log_delivery/log_delivery.tf new file mode 100644 index 00000000..6ebe17ab --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/account_log_delivery/log_delivery.tf @@ -0,0 +1,107 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + } + } +} + +resource "aws_s3_bucket" "logdelivery" { + bucket = "${var.resource_prefix}-logdelivery" + force_destroy = true + tags = merge(var.tags, { + Name = "${var.resource_prefix}-logdelivery" + }) +} + +resource "aws_s3_bucket_acl" "public_storage" { + bucket = aws_s3_bucket.logdelivery.id + acl = "private" + depends_on = [aws_s3_bucket_ownership_controls.public_storage] +} + +# Resource to avoid error "AccessControlListNotSupported: The bucket does not allow ACLs" +resource "aws_s3_bucket_ownership_controls" "public_storage" { + bucket = aws_s3_bucket.logdelivery.id + rule { + object_ownership = "ObjectWriter" + } +} + +resource "aws_s3_bucket_public_access_block" "logdelivery" { + bucket = aws_s3_bucket.logdelivery.id + ignore_public_acls = true +} + +data "databricks_aws_assume_role_policy" "logdelivery" { + external_id = var.databricks_account_id + for_log_delivery = true +} + +resource "aws_s3_bucket_versioning" "logdelivery_versioning" { + bucket = aws_s3_bucket.logdelivery.id + versioning_configuration { + status = "Disabled" + } +} + +resource "aws_iam_role" "logdelivery" { + name = "${var.resource_prefix}-logdelivery" + description = "(${var.resource_prefix}) UsageDelivery role" + assume_role_policy = data.databricks_aws_assume_role_policy.logdelivery.json + tags = var.tags +} + +data "databricks_aws_bucket_policy" "logdelivery" { + full_access_role = aws_iam_role.logdelivery.arn + bucket = aws_s3_bucket.logdelivery.bucket +} + +resource "aws_s3_bucket_policy" "logdelivery" { + bucket = aws_s3_bucket.logdelivery.id + policy = data.databricks_aws_bucket_policy.logdelivery.json +} + +resource "time_sleep" "wait_logdelivery" { + depends_on = [ + aws_iam_role.logdelivery + ] + create_duration = "10s" +} + +resource "databricks_mws_credentials" "log_writer" { + credentials_name = "${var.resource_prefix}-Usage-Delivery" + role_arn = aws_iam_role.logdelivery.arn + depends_on = [ + time_sleep.wait_logdelivery + ] +} + +resource "databricks_mws_storage_configurations" "log_bucket" { + account_id = var.databricks_account_id + storage_configuration_name = "${var.resource_prefix}-Usage-Logs" + bucket_name = aws_s3_bucket.logdelivery.bucket +} + +resource "databricks_mws_log_delivery" "usage_logs" { + account_id = var.databricks_account_id + credentials_id = databricks_mws_credentials.log_writer.credentials_id + storage_configuration_id = databricks_mws_storage_configurations.log_bucket.storage_configuration_id + delivery_path_prefix = "billable-usage" + config_name = "${var.resource_prefix}-Usage-Logs" + log_type = "BILLABLE_USAGE" + output_format = "CSV" +} + +resource "databricks_mws_log_delivery" "audit_logs" { + account_id = var.databricks_account_id + credentials_id = databricks_mws_credentials.log_writer.credentials_id + storage_configuration_id = databricks_mws_storage_configurations.log_bucket.storage_configuration_id + delivery_path_prefix = "audit-logs" + config_name = "${var.resource_prefix}-Audit-Logs" + log_type = "AUDIT_LOGS" + output_format = "JSON" +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/account_log_delivery/outputs.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/account_log_delivery/outputs.tf new file mode 100644 index 00000000..f18f37c3 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/account_log_delivery/outputs.tf @@ -0,0 +1,3 @@ +output "logdelivery_s3_bucket_name" { + value = aws_s3_bucket.logdelivery.bucket +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/account_log_delivery/variables.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/account_log_delivery/variables.tf new file mode 100644 index 00000000..48b5182e --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/account_log_delivery/variables.tf @@ -0,0 +1,15 @@ +variable "resource_prefix" { + description = "Prefix for all resources" + type = string +} + +variable "tags" { + description = "Tags for all resources" + type = map(string) +} + +variable "databricks_account_id" { + description = "Databricks account ID" + type = string +} + diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/aws_network/main.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/aws_network/main.tf new file mode 100644 index 00000000..c000d9a7 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/aws_network/main.tf @@ -0,0 +1,191 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + } +} + +data "aws_availability_zones" "available" { + state = "available" +} + +locals { + azlookup = slice(data.aws_availability_zones.available.names, 0, var.number_of_azs) +} + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + name = var.vpc_name + cidr = var.vpc_cidr + + azs = local.azlookup + private_subnet_names = var.private_subnet_names + private_subnets = var.private_subnets + public_subnet_names = var.public_subnet_names + public_subnets = var.public_subnets + intra_subnet_names = var.intra_subnet_names + intra_subnets = var.intra_subnets + + enable_dns_hostnames = true + enable_nat_gateway = true + single_nat_gateway = false + one_nat_gateway_per_az = true + create_igw = true + + tags = { + Environment = "${var.resource_prefix}-env" + } +} + +resource "aws_security_group" "sg" { + count = var.workspace_number + name = "${var.resource_prefix}-workspace-sg-${count.index}" + vpc_id = module.vpc.vpc_id + depends_on = [module.vpc] + + dynamic "ingress" { + for_each = ["tcp", "udp"] + content { + description = "Databricks - Workspace SG - Internode Communication" + from_port = 0 + to_port = 65535 + protocol = ingress.value + self = true + } + } + + dynamic "egress" { + for_each = ["tcp", "udp"] + content { + description = "Databricks - Workspace SG - Internode Communication" + from_port = 0 + to_port = 65535 + protocol = egress.value + self = true + } + } + + dynamic "egress" { + for_each = var.sg_egress_ports + content { + description = "Databricks - Workspace SG - REST (443), Secure Cluster Connectivity (2443/6666), Future Extendability (8443-8451)" + from_port = egress.value + to_port = egress.value + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + } + + tags = { + Name = "${var.resource_prefix}-workspace-sg" + Project = var.resource_prefix + } +} + + +resource "aws_security_group" "privatelink" { + vpc_id = module.vpc.vpc_id + + ingress { + description = "Databricks - PrivateLink Endpoint SG - REST API" + from_port = 443 + to_port = 443 + protocol = "tcp" + security_groups = [for i, sg in aws_security_group.sg : sg.id] + } + + ingress { + description = "Databricks - PrivateLink Endpoint SG - Secure Cluster Connectivity" + from_port = 6666 + to_port = 6666 + protocol = "tcp" + security_groups = [for i, sg in aws_security_group.sg : sg.id] + } + + ingress { + description = "Databricks - PrivateLink Endpoint SG - Secure Cluster Connectivity - Compliance Security Profile" + from_port = 2443 + to_port = 2443 + protocol = "tcp" + security_groups = [for i, sg in aws_security_group.sg : sg.id] + } + + ingress { + description = "Databricks - PrivateLink Endpoint SG - Future Extendability" + from_port = 8443 + to_port = 8451 + protocol = "tcp" + security_groups = [for i, sg in aws_security_group.sg : sg.id] + } + + tags = { + Name = "${var.resource_prefix}-private-link-sg", + Project = var.resource_prefix + } + depends_on = [aws_security_group.sg] +} + +module "vpc_endpoints" { + source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints" + vpc_id = module.vpc.vpc_id + security_group_ids = [aws_security_group.privatelink.id] + + endpoints = { + s3 = { + service = "s3" + service_type = "Gateway" + route_table_ids = module.vpc.private_route_table_ids + tags = { + Name = "${var.resource_prefix}-s3-vpc-endpoint" + Project = var.resource_prefix + } + }, + sts = { + service = "sts" + private_dns_enabled = true + subnet_ids = module.vpc.intra_subnets + tags = { + Name = "${var.resource_prefix}-sts-vpc-endpoint" + Project = var.resource_prefix + } + }, + kinesis-streams = { + service = "kinesis-streams" + private_dns_enabled = true + subnet_ids = module.vpc.intra_subnets + tags = { + Name = "${var.resource_prefix}-kinesis-vpc-endpoint" + Project = var.resource_prefix + } + } + } +} + +# Databricks REST endpoint +resource "aws_vpc_endpoint" "backend_rest" { + vpc_id = module.vpc.vpc_id + service_name = var.workspace + vpc_endpoint_type = "Interface" + security_group_ids = [aws_security_group.privatelink.id] + subnet_ids = module.vpc.intra_subnets + private_dns_enabled = true + tags = { + Name = "${var.resource_prefix}-databricks-backend-rest" + Project = var.resource_prefix + } +} + +# Databricks SCC endpoint +resource "aws_vpc_endpoint" "backend_relay" { + vpc_id = module.vpc.vpc_id + service_name = var.scc_relay + vpc_endpoint_type = "Interface" + security_group_ids = [aws_security_group.privatelink.id] + subnet_ids = module.vpc.intra_subnets + private_dns_enabled = true + tags = { + Name = "${var.resource_prefix}-databricks-backend-relay" + Project = var.resource_prefix + } +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/aws_network/outputs.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/aws_network/outputs.tf new file mode 100644 index 00000000..6829e148 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/aws_network/outputs.tf @@ -0,0 +1,23 @@ +output "vpc_id" { + value = module.vpc.vpc_id +} + +output "private_subnets" { + value = module.vpc.private_subnets +} + +output "workspace_security_group_ids" { + value = aws_security_group.sg[*].id +} + +output "privatelink_security_group_ids" { + value = aws_security_group.privatelink.id +} + +output "workspace_endpoint_id" { + value = aws_vpc_endpoint.backend_rest.id +} + +output "scc_relay_endpoint_id" { + value = aws_vpc_endpoint.backend_relay.id +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/aws_network/variables.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/aws_network/variables.tf new file mode 100644 index 00000000..06e31cd8 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/aws_network/variables.tf @@ -0,0 +1,63 @@ +variable "region" { + type = string +} + +variable "vpc_name" { + type = string +} + +variable "vpc_cidr" { + type = string +} + +variable "workspace_number" { + type = number + description = "How many workspaces to create in this segment in the same VPC" +} + +variable "number_of_azs" { + type = number + description = "Used in vpc module, how many AZs to create subnets in" +} + +variable "private_subnets" { + type = list(string) + description = "Used in vpc module, list of private subnets to create, each workspace will have at least 2 subnets from different AZs" +} + +variable "private_subnet_names" { + type = list(string) +} + +variable "public_subnets" { + type = list(string) +} + +variable "public_subnet_names" { + type = list(string) +} + +variable "intra_subnets" { + type = list(string) +} + +variable "intra_subnet_names" { + type = list(string) +} + +variable "resource_prefix" { + type = string +} + +variable "sg_egress_ports" { + description = "List of egress ports for security groups." + type = list(string) +} + +variable "scc_relay" { + type = string +} + +variable "workspace" { + type = string +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_credential/main.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_credential/main.tf new file mode 100644 index 00000000..05a39a9e --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_credential/main.tf @@ -0,0 +1,268 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + } + time = { + source = "hashicorp/time" + } + } +} + +data "databricks_aws_assume_role_policy" "this" { + external_id = var.databricks_account_id +} + +// New cross-account iam role for workspace deployment +resource "aws_iam_role" "cross_account_role" { + name = "${var.resource_prefix}-crossaccount" + assume_role_policy = data.databricks_aws_assume_role_policy.this.json +} + +data "databricks_aws_crossaccount_policy" "this" { + policy_type = "customer" +} + +# a walkaround using sleep to wait for role to be created +resource "time_sleep" "wait" { + depends_on = [ + aws_iam_role.cross_account_role + ] + create_duration = "10s" +} + +resource "databricks_mws_credentials" "this" { + # account_id should not be specified in mws credentials + role_arn = aws_iam_role.cross_account_role.arn + credentials_name = "${var.resource_prefix}-creds" + depends_on = [aws_iam_role_policy.cross_account, time_sleep.wait] +} + + +resource "aws_iam_role_policy" "cross_account" { + name = "${var.resource_prefix}-crossaccount-policy" + role = aws_iam_role.cross_account_role.id + policy = jsonencode({ + "Version" : "2012-10-17", + "Statement" : [ + { + "Sid" : "NonResourceBasedPermissions", + "Effect" : "Allow", + "Action" : [ + "ec2:CancelSpotInstanceRequests", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeIamInstanceProfileAssociations", + "ec2:DescribeInstanceStatus", + "ec2:DescribeInstances", + "ec2:DescribeInternetGateways", + "ec2:DescribeNatGateways", + "ec2:DescribeNetworkAcls", + "ec2:DescribePrefixLists", + "ec2:DescribeReservedInstancesOfferings", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSpotInstanceRequests", + "ec2:DescribeSpotPriceHistory", + "ec2:DescribeSubnets", + "ec2:DescribeVolumes", + "ec2:DescribeVpcAttribute", + "ec2:DescribeVpcs", + "ec2:CreateTags", + "ec2:DeleteTags", + "ec2:RequestSpotInstances" + ], + "Resource" : [ + "*" + ] + }, + { + "Sid" : "FleetPermissions", + "Effect" : "Allow", + "Action" : [ + "ec2:DescribeFleetHistory", + "ec2:ModifyFleet", + "ec2:DeleteFleets", + "ec2:DescribeFleetInstances", + "ec2:DescribeFleets", + "ec2:CreateFleet", + "ec2:DeleteLaunchTemplate", + "ec2:GetLaunchTemplateData", + "ec2:CreateLaunchTemplate", + "ec2:DescribeLaunchTemplates", + "ec2:DescribeLaunchTemplateVersions", + "ec2:ModifyLaunchTemplate", + "ec2:DeleteLaunchTemplateVersions", + "ec2:CreateLaunchTemplateVersion", + "ec2:AssignPrivateIpAddresses", + "ec2:GetSpotPlacementScores" + ], + "Resource" : [ + "*" + ] + }, + { + "Sid" : "InstancePoolsSupport", + "Effect" : "Allow", + "Action" : [ + "ec2:AssociateIamInstanceProfile", + "ec2:DisassociateIamInstanceProfile", + "ec2:ReplaceIamInstanceProfileAssociation" + ], + "Resource" : "arn:aws:ec2:${var.region}:${var.aws_account_id}:instance/*", + "Condition" : { + "StringEquals" : { + "ec2:ResourceTag/Vendor" : "Databricks" + } + } + }, + { + "Sid" : "AllowEc2RunInstancePerTag", + "Effect" : "Allow", + "Action" : "ec2:RunInstances", + "Resource" : [ + "arn:aws:ec2:${var.region}:${var.aws_account_id}:volume/*", + "arn:aws:ec2:${var.region}:${var.aws_account_id}:instance/*" + ], + "Condition" : { + "StringEquals" : { + "aws:RequestTag/Vendor" : "Databricks" + } } + }, + { + "Sid" : "AllowEc2RunInstancePerVPCid", + "Effect" : "Allow", + "Action" : "ec2:RunInstances", + "Resource" : [ + "arn:aws:ec2:${var.region}:${var.aws_account_id}:network-interface/*", + "arn:aws:ec2:${var.region}:${var.aws_account_id}:subnet/*", + "arn:aws:ec2:${var.region}:${var.aws_account_id}:security-group/*" + ], + "Condition" : { + "StringEquals" : { + "ec2:vpc" : "arn:aws:ec2:${var.region}:${var.aws_account_id}:vpc/${var.vpc_id}" + } + } + }, + { + "Sid" : "AllowEc2RunInstanceOtherResources", + "Effect" : "Allow", + "Action" : "ec2:RunInstances", + "NotResource" : [ + "arn:aws:ec2:${var.region}:${var.aws_account_id}:network-interface/*", + "arn:aws:ec2:${var.region}:${var.aws_account_id}:subnet/*", + "arn:aws:ec2:${var.region}:${var.aws_account_id}:security-group/*", + "arn:aws:ec2:${var.region}:${var.aws_account_id}:volume/*", + "arn:aws:ec2:${var.region}:${var.aws_account_id}:instance/*" + ] + }, + { + "Sid" : "DatabricksSuppliedImages", + "Effect" : "Deny", + "Action" : "ec2:RunInstances", + "Resource" : [ + "arn:aws:ec2:*:*:image/*" + ], + "Condition" : { + "StringNotEquals" : { + "ec2:Owner" : "601306020600" + } + } + }, + { + "Sid" : "EC2TerminateInstancesTag", + "Effect" : "Allow", + "Action" : [ + "ec2:TerminateInstances" + ], + "Resource" : [ + "arn:aws:ec2:${var.region}:${var.aws_account_id}:instance/*" + ], + "Condition" : { + "StringEquals" : { + "ec2:ResourceTag/Vendor" : "Databricks" + } + } + }, + { + "Sid" : "EC2AttachDetachVolumeTag", + "Effect" : "Allow", + "Action" : [ + "ec2:AttachVolume", + "ec2:DetachVolume" + ], + "Resource" : [ + "arn:aws:ec2:${var.region}:${var.aws_account_id}:instance/*", + "arn:aws:ec2:${var.region}:${var.aws_account_id}:volume/*" + ], + "Condition" : { + "StringEquals" : { + "ec2:ResourceTag/Vendor" : "Databricks" + } + } + }, + { + "Sid" : "EC2CreateVolumeByTag", + "Effect" : "Allow", + "Action" : [ + "ec2:CreateVolume" + ], + "Resource" : [ + "arn:aws:ec2:${var.region}:${var.aws_account_id}:volume/*" + ], + "Condition" : { + "StringEquals" : { + "aws:RequestTag/Vendor" : "Databricks" + } + } + }, + { + "Sid" : "EC2DeleteVolumeByTag", + "Effect" : "Allow", + "Action" : [ + "ec2:DeleteVolume" + ], + "Resource" : [ + "arn:aws:ec2:${var.region}:${var.aws_account_id}:volume/*" + ], + "Condition" : { + "StringEquals" : { + "ec2:ResourceTag/Vendor" : "Databricks" + } + } + }, + { + "Effect" : "Allow", + "Action" : [ + "iam:CreateServiceLinkedRole", + "iam:PutRolePolicy" + ], + "Resource" : "arn:aws:iam::*:role/aws-service-role/spot.amazonaws.com/AWSServiceRoleForEC2Spot", + "Condition" : { + "StringLike" : { + "iam:AWSServiceName" : "spot.amazonaws.com" + } + } + }, + { + "Sid" : "VpcNonresourceSpecificActions", + "Effect" : "Allow", + "Action" : [ + "ec2:AuthorizeSecurityGroupEgress", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:RevokeSecurityGroupEgress", + "ec2:RevokeSecurityGroupIngress" + ], + "Resource" : "arn:aws:ec2:${var.region}:${var.aws_account_id}:security-group/${var.security_group_id}", + "Condition" : { + "StringEquals" : { + "ec2:vpc" : "arn:aws:ec2:${var.region}:${var.aws_account_id}:vpc/${var.vpc_id}" + } + } + } + ] + } + ) +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_credential/outputs.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_credential/outputs.tf new file mode 100644 index 00000000..7d136201 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_credential/outputs.tf @@ -0,0 +1,8 @@ +output "storage_credential_id" { + value = databricks_mws_credentials.this.credentials_id +} + +output "cross_account_role_arn" { + value = aws_iam_role.cross_account_role.arn +} + diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_credential/variables.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_credential/variables.tf new file mode 100644 index 00000000..24d1efaa --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_credential/variables.tf @@ -0,0 +1,29 @@ +variable "region" { + type = string + description = "AWS region" +} + +variable "aws_account_id" { + type = string + description = "AWS account ID" +} + +variable "databricks_account_id" { + type = string + description = "Databricks account ID" +} + +variable "resource_prefix" { + type = string + description = "Resource prefix" +} + +variable "vpc_id" { + type = string + description = "VPC ID" +} + +variable "security_group_id" { + type = string + description = "Security group ID" +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_network/main.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_network/main.tf new file mode 100644 index 00000000..40ef7df0 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_network/main.tf @@ -0,0 +1,24 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + } + } +} + + +resource "databricks_mws_networks" "mwsnetwork" { + account_id = var.databricks_account_id + network_name = "${var.resource_prefix}-network" + security_group_ids = [var.security_group_id] + subnet_ids = var.private_subnet_ids + vpc_id = var.vpc_id + + vpc_endpoints { + rest_api = [var.workspace_endpoint_id] + dataplane_relay = [var.scc_relay_endpoint_id] + } +} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/outputs.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_network/outputs.tf similarity index 100% rename from examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/outputs.tf rename to examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_network/outputs.tf diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_network/variables.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_network/variables.tf new file mode 100644 index 00000000..ca9ca3a2 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_network/variables.tf @@ -0,0 +1,37 @@ +variable "resource_prefix" { + type = string +} + +variable "databricks_account_id" { + type = string +} + +variable "region" { + type = string + description = "AWS region" +} + +variable "vpc_id" { + type = string + description = "VPC ID" +} + +variable "private_subnet_ids" { + type = list(string) + description = "Private subnet IDs, e.g. [subnet-0123456789abcdefg, subnet-0123456789abcdefh]" +} + +variable "security_group_id" { + type = string + description = "Security group ID" +} + +variable "workspace_endpoint_id" { + type = string + description = "Workspace endpoint ID" +} + +variable "scc_relay_endpoint_id" { + type = string + description = "SCC relay endpoint ID" +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_root_storage/main.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_root_storage/main.tf new file mode 100644 index 00000000..a7ccbb51 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_root_storage/main.tf @@ -0,0 +1,73 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + } + } +} + +resource "aws_s3_bucket" "root_storage_bucket" { + bucket = "${var.resource_prefix}-rootbucket" + force_destroy = true + tags = { + Name = "${var.resource_prefix}-rootbucket" + } +} + +resource "aws_s3_bucket_ownership_controls" "example" { + bucket = aws_s3_bucket.root_storage_bucket.id + rule { + object_ownership = "BucketOwnerPreferred" + } +} + +resource "aws_s3_bucket_acl" "example" { + depends_on = [aws_s3_bucket_ownership_controls.example] + bucket = aws_s3_bucket.root_storage_bucket.id + acl = "private" +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "root_storage_bucket" { + bucket = aws_s3_bucket.root_storage_bucket.bucket + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +resource "aws_s3_bucket_public_access_block" "root_storage_bucket" { + bucket = aws_s3_bucket.root_storage_bucket.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true + depends_on = [aws_s3_bucket.root_storage_bucket] +} + +data "databricks_aws_bucket_policy" "this" { + bucket = aws_s3_bucket.root_storage_bucket.bucket +} + +resource "aws_s3_bucket_policy" "root_bucket_policy" { + bucket = aws_s3_bucket.root_storage_bucket.id + policy = data.databricks_aws_bucket_policy.this.json + depends_on = [aws_s3_bucket_public_access_block.root_storage_bucket] +} + +resource "aws_s3_bucket_versioning" "root_bucket_versioning" { + bucket = aws_s3_bucket.root_storage_bucket.id + versioning_configuration { + status = "Disabled" + } +} + +resource "databricks_mws_storage_configurations" "this" { + account_id = var.databricks_account_id + bucket_name = aws_s3_bucket.root_storage_bucket.bucket + storage_configuration_name = "${var.resource_prefix}-storage" +} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/outputs.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_root_storage/outputs.tf similarity index 56% rename from examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/outputs.tf rename to examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_root_storage/outputs.tf index b60394ff..4d4f4b7b 100644 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/outputs.tf +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_root_storage/outputs.tf @@ -1,3 +1,7 @@ output "storage_configuration_id" { value = databricks_mws_storage_configurations.this.storage_configuration_id } + +output "bucket_name" { + value = databricks_mws_storage_configurations.this.bucket_name +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_root_storage/variables.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_root_storage/variables.tf new file mode 100644 index 00000000..27c96e0e --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/modules/workspace_root_storage/variables.tf @@ -0,0 +1,9 @@ +variable "resource_prefix" { + type = string + description = "Resource prefix" +} + +variable "databricks_account_id" { + type = string + description = "Databricks account ID" +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/outputs.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/outputs.tf new file mode 100644 index 00000000..d7ce08d1 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/outputs.tf @@ -0,0 +1,23 @@ +output "cross_account_role_arn" { + value = [for i in module.workspace_credential : i.cross_account_role_arn] +} + +output "workspace_security_group_ids" { + value = module.aws_network.workspace_security_group_ids +} + +output "privatelink_security_group_ids" { + value = module.aws_network.privatelink_security_group_ids +} + +output "databricks_host" { + value = databricks_mws_workspaces.workspaces_collection[*].workspace_url +} + +output "workspace_ids" { + value = databricks_mws_workspaces.workspaces_collection[*].workspace_id +} + +output "metastore_id" { + value = var.deploy_metastore == "true" ? databricks_metastore.this[0].metastore_id : null +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/providers.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/providers.tf new file mode 100644 index 00000000..b02a554a --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + configuration_aliases = [databricks.mws] + } + time = { + source = "hashicorp/time" + } + } +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/variables.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/variables.tf new file mode 100644 index 00000000..e026f084 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/variables.tf @@ -0,0 +1,116 @@ +variable "databricks_account_id" { + type = string +} + +variable "aws_account_id" { + type = string +} + +variable "client_id" { + type = string +} + +variable "client_secret" { + type = string +} + +variable "region" { + type = string + default = "ap-southeast-1" +} + +variable "resource_prefix" { + type = string + default = "hao-tf" +} + +variable "vpc_name" { + type = string + default = "hao-demo-vpc1" +} + +variable "vpc_cidr" { + type = string + default = "10.0.0.0/16" +} + +variable "workspace_number" { + type = number + default = 2 + description = "determins how many workspaces security groups will be created from aws_network module, each workspace will have one security group" +} + +variable "number_of_azs" { + type = number + description = "Used in vpc module, how many AZs to create subnets in" +} + +variable "deploy_metastore" { + type = string + default = "false" + description = "deploy metastore" +} + +variable "existing_metastore_id" { + type = string + description = "metastore id if it's already created" +} + +variable "metastore_admin_group_name" { + type = string + description = "metastore admin group name" +} + +variable "deploy_log_delivery" { + type = string + description = "deploy log delivery for the environment, using dedicated s3 bucket and role" + default = "false" +} + +variable "private_subnets" { + type = list(string) + default = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24", "10.0.4.0/24"] +} + +variable "private_subnet_names" { + type = list(string) + default = ["private-subnet-1", "private-subnet-2", "private-subnet-3", "private-subnet-4"] +} + +variable "public_subnets" { + type = list(string) + default = ["10.0.101.0/24", "10.0.102.0/24"] +} + +variable "public_subnet_names" { + type = list(string) + default = ["public-subnet-1", "public-subnet-2"] +} + +variable "intra_subnets" { + type = list(string) + default = ["10.0.103.0/27", "10.0.104.0/27"] +} + +variable "intra_subnet_names" { + type = list(string) + default = ["privatelink-subnet-1", "privatelink-subnet-2"] +} + +variable "sg_egress_ports" { + description = "List of egress ports for security groups." + type = list(string) + default = [443, 2443, 3306, 6666, 8443, 8444, 8445, 8446, 8447, 8448, 8449, 8450, 8451] +} + +variable "scc_relay" { + type = string +} + +variable "workspace" { + type = string +} + +variable "tags" { + default = {} +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/workspace.tf b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/workspace.tf new file mode 100644 index 00000000..364ec8e5 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_account_tf_pipeline/workspace.tf @@ -0,0 +1,150 @@ +resource "random_string" "naming" { + special = false + upper = false + length = 6 +} + +locals { + prefix = "demo-${random_string.naming.result}" +} + +# aws_network module creates the VPC, subnets, and security groups +module "aws_network" { + providers = { + aws = aws + } + source = "./modules/aws_network" + region = var.region + vpc_name = var.vpc_name + vpc_cidr = var.vpc_cidr + workspace_number = var.workspace_number + number_of_azs = var.number_of_azs + private_subnets = var.private_subnets + private_subnet_names = var.private_subnet_names + public_subnets = var.public_subnets + public_subnet_names = var.public_subnet_names + intra_subnets = var.intra_subnets + intra_subnet_names = var.intra_subnet_names + resource_prefix = var.resource_prefix + sg_egress_ports = var.sg_egress_ports + scc_relay = var.scc_relay + workspace = var.workspace +} + +module "workspace_credential" { + count = var.workspace_number + + providers = { + aws = aws + databricks = databricks.mws + time = time + } + + source = "./modules/workspace_credential" + region = var.region + aws_account_id = var.aws_account_id + databricks_account_id = var.databricks_account_id + resource_prefix = "${local.prefix}-${count.index}" + vpc_id = module.aws_network.vpc_id + security_group_id = module.aws_network.workspace_security_group_ids[count.index] +} + +module "workspace_root_storage" { + count = var.workspace_number + + providers = { + aws = aws + databricks = databricks.mws + } + + source = "./modules/workspace_root_storage" + resource_prefix = "${local.prefix}-${count.index}" + databricks_account_id = var.databricks_account_id +} + +# Backend REST VPC Endpoint Configuration +resource "databricks_mws_vpc_endpoint" "backend_rest" { + provider = databricks.mws + account_id = var.databricks_account_id + aws_vpc_endpoint_id = module.aws_network.workspace_endpoint_id + vpc_endpoint_name = "${var.resource_prefix}-vpce-backend" + region = var.region +} + +# Backend Rest VPC Endpoint Configuration +resource "databricks_mws_vpc_endpoint" "backend_relay" { + provider = databricks.mws + account_id = var.databricks_account_id + aws_vpc_endpoint_id = module.aws_network.scc_relay_endpoint_id + vpc_endpoint_name = "${var.resource_prefix}-vpce-relay" + region = var.region +} + +// Private Access Setting Configuration, shared by multiple workspaces (or you can build multiple PAS for multiple workspaces) +resource "databricks_mws_private_access_settings" "pas" { + provider = databricks.mws + private_access_settings_name = "${var.resource_prefix}-PAS" + region = var.region + public_access_enabled = true + private_access_level = "ACCOUNT" +} + +module "workspace_network" { + count = var.workspace_number + + providers = { + aws = aws + databricks = databricks.mws + } + + source = "./modules/workspace_network" + resource_prefix = "${local.prefix}-${count.index}" + databricks_account_id = var.databricks_account_id + region = var.region + vpc_id = module.aws_network.vpc_id + security_group_id = module.aws_network.workspace_security_group_ids[count.index] + private_subnet_ids = [ + module.aws_network.private_subnets[count.index * 2], + module.aws_network.private_subnets[count.index * 2 + 1] + ] # each workspace will have 2 private subnets + workspace_endpoint_id = databricks_mws_vpc_endpoint.backend_rest.vpc_endpoint_id + scc_relay_endpoint_id = databricks_mws_vpc_endpoint.backend_relay.vpc_endpoint_id +} + +# Create workspaces +resource "databricks_mws_workspaces" "workspaces_collection" { + count = var.workspace_number + provider = databricks.mws + account_id = var.databricks_account_id + aws_region = var.region + workspace_name = "${local.prefix}-${count.index}" + credentials_id = module.workspace_credential[count.index].storage_credential_id + storage_configuration_id = module.workspace_root_storage[count.index].storage_configuration_id + network_id = module.workspace_network[count.index].network_id + private_access_settings_id = databricks_mws_private_access_settings.pas.private_access_settings_id + managed_services_customer_managed_key_id = databricks_mws_customer_managed_keys.managed_services.customer_managed_key_id + storage_customer_managed_key_id = databricks_mws_customer_managed_keys.storage.customer_managed_key_id + + token { + comment = "Terraform" + } +} + +resource "databricks_metastore_assignment" "metastore_assignments" { + count = var.deploy_metastore == "true" || var.existing_metastore_id != null ? var.workspace_number : 0 + + provider = databricks.mws + workspace_id = databricks_mws_workspaces.workspaces_collection[count.index].workspace_id + metastore_id = var.deploy_metastore == "true" ? databricks_metastore.this[0].id : var.existing_metastore_id +} + +module "account_log_delivery" { + count = var.deploy_log_delivery == "true" ? 1 : 0 + providers = { + databricks = databricks.mws + } + source = "./modules/account_log_delivery" + resource_prefix = var.resource_prefix + tags = var.tags + databricks_account_id = var.databricks_account_id +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/clusters.tf b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/clusters.tf new file mode 100644 index 00000000..9cc096ce --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/clusters.tf @@ -0,0 +1,37 @@ +# Create a cluster for each user +resource "databricks_cluster" "dedicated_user_clusters" { + provider = databricks.workspace + for_each = toset(var.databricks_users) + cluster_name = "${each.value}-dedicated-cluster" + + spark_version = "15.4.x-scala2.12" + node_type_id = "r5.xlarge" + autotermination_minutes = 30 + is_single_node = true + kind = "CLASSIC_PREVIEW" + data_security_mode = "SINGLE_USER" + enable_elastic_disk = true + + custom_tags = { + Owner = each.value + } +} + +# Separate permissions resource but with lifecycle rule to prevent cluster restarts due to permission changes +resource "databricks_permissions" "cluster_permissions" { + provider = databricks.workspace + for_each = databricks_cluster.dedicated_user_clusters + + cluster_id = each.value.id + + access_control { + user_name = each.key + permission_level = "CAN_RESTART" + } + + lifecycle { + ignore_changes = [ + cluster_id + ] + } +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/configs/structured-output.yaml b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/configs/structured-output.yaml new file mode 100755 index 00000000..736421e2 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/configs/structured-output.yaml @@ -0,0 +1,18 @@ +- "catalog_name": "demo-lm3w-catalog01" + "external_location_name": "demo-lm3w-external-location-001" + "iam_role_name": "demo-lm3w-iam-role-001" + "region": "ap-southeast-1" + "s3_bucket_name": "demo-lm3w-uc-bucket-001" + "storage_credential_name": "demo-lm3w-uc-credential-001" +- "catalog_name": "demo-lm3w-catalog02" + "external_location_name": "demo-lm3w-external-location-002" + "iam_role_name": "demo-lm3w-iam-role-002" + "region": "ap-southeast-1" + "s3_bucket_name": "demo-lm3w-uc-bucket-002" + "storage_credential_name": "demo-lm3w-uc-credential-002" +- "catalog_name": "demo-lm3w-catalog03" + "external_location_name": "demo-lm3w-external-location-003" + "iam_role_name": "demo-lm3w-iam-role-003" + "region": "ap-southeast-1" + "s3_bucket_name": "demo-lm3w-uc-bucket-003" + "storage_credential_name": "demo-lm3w-uc-credential-003" diff --git a/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/main.tf b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/main.tf new file mode 100644 index 00000000..4946e898 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/main.tf @@ -0,0 +1,52 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + } + } +} + +provider "databricks" { + alias = "workspace" + host = var.databricks_host + account_id = var.databricks_account_id + client_id = var.client_id + client_secret = var.client_secret +} + +provider "aws" { + region = var.region +} + +module "starter_catalogs" { + source = "./modules/uc_catalogs_init" + providers = { + databricks = databricks.workspace + aws = aws // using default aws provider + } + for_each = local.init_uc_catalogs + + catalog_name = lower(each.value.catalog_name) + s3_bucket_name = lower(each.value.s3_bucket_name) + iam_role_name = each.value.iam_role_name + storage_credential_name = each.value.storage_credential_name + external_location_name = each.value.external_location_name +} + +resource "random_string" "naming" { + special = false + upper = false + length = 6 +} + +locals { + prefix = "demo${random_string.naming.result}" + configs = yamldecode(file("${path.module}/configs/structured-output.yaml")) + + init_uc_catalogs = { + for cfg in local.configs : cfg.catalog_name => cfg + } +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/modules/uc_catalogs_init/main.tf b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/modules/uc_catalogs_init/main.tf new file mode 100644 index 00000000..78b5929d --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/modules/uc_catalogs_init/main.tf @@ -0,0 +1,103 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + } + } +} + +resource "null_resource" "previous" {} + +// Wait to prevent race condition between IAM role and external location validation +resource "time_sleep" "wait_60_seconds" { + depends_on = [null_resource.previous] + create_duration = "60s" +} + +resource "aws_s3_bucket" "uc_demo" { + bucket = var.s3_bucket_name + force_destroy = true // delete all objects in the bucket before deleting the bucket +} + +resource "aws_s3_bucket_versioning" "uc_demo_versioning" { + bucket = aws_s3_bucket.uc_demo.id + versioning_configuration { + status = "Disabled" + } +} + +resource "aws_s3_bucket_public_access_block" "uc_demo" { + bucket = aws_s3_bucket.uc_demo.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true + depends_on = [aws_s3_bucket.uc_demo] +} + +data "aws_caller_identity" "current" {} + +data "databricks_aws_unity_catalog_assume_role_policy" "uc_policy" { + aws_account_id = data.aws_caller_identity.current.account_id + role_name = var.iam_role_name + external_id = databricks_storage_credential.uc_demo.aws_iam_role[0].external_id +} + +data "databricks_aws_unity_catalog_policy" "uc_demo" { + aws_account_id = data.aws_caller_identity.current.account_id + bucket_name = aws_s3_bucket.uc_demo.id + role_name = var.iam_role_name +} + +resource "aws_iam_policy" "external_data_access" { + policy = data.databricks_aws_unity_catalog_policy.uc_demo.json +} + +resource "aws_iam_role_policy_attachment" "policy_attachment" { + policy_arn = aws_iam_policy.external_data_access.arn + role = aws_iam_role.uc_demo.name +} + +resource "aws_iam_role" "uc_demo" { // create IAM role after storage credential is created + name = var.iam_role_name + assume_role_policy = data.databricks_aws_unity_catalog_assume_role_policy.uc_policy.json + depends_on = [databricks_storage_credential.uc_demo] +} + +resource "databricks_storage_credential" "uc_demo" { + name = var.storage_credential_name + //cannot reference aws_iam_role directly, as it will create circular dependency + aws_iam_role { + role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.iam_role_name}" + } + comment = "Managed by Terraform" + force_destroy = true + isolation_mode = "ISOLATION_MODE_ISOLATED" +} + +resource "databricks_external_location" "uc_demo" { + name = var.external_location_name + url = "s3://${aws_s3_bucket.uc_demo.bucket}" + credential_name = databricks_storage_credential.uc_demo.name + comment = "Managed by Terraform" + isolation_mode = "ISOLATION_MODE_ISOLATED" + skip_validation = true + depends_on = [aws_iam_role_policy_attachment.policy_attachment, time_sleep.wait_60_seconds] +} + +resource "databricks_catalog" "uc_demo" { + name = var.catalog_name + storage_root = "s3://${aws_s3_bucket.uc_demo.bucket}/${var.catalog_name}/" + isolation_mode = "ISOLATED" + comment = "Default isolated catalog dedicated for this workspace, managed by Terraform" + depends_on = [databricks_external_location.uc_demo] +} + +resource "databricks_schema" "uc_demo" { + catalog_name = databricks_catalog.uc_demo.id + name = "default" + comment = "Default schema for ${var.catalog_name}, managed by Terraform" +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/modules/uc_catalogs_init/outputs.tf b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/modules/uc_catalogs_init/outputs.tf new file mode 100644 index 00000000..e96cd49f --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/modules/uc_catalogs_init/outputs.tf @@ -0,0 +1,19 @@ +output "uc_bucket_name" { + value = aws_s3_bucket.uc_demo.bucket +} + +output "uc_iam_role_arn" { + value = aws_iam_role.uc_demo.arn +} + +output "uc_storage_credential_name" { + value = databricks_storage_credential.uc_demo.name +} + +output "uc_external_location_name" { + value = databricks_external_location.uc_demo.name +} + +output "uc_catalog_name" { + value = databricks_catalog.uc_demo.name +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/modules/uc_catalogs_init/variables.tf b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/modules/uc_catalogs_init/variables.tf new file mode 100644 index 00000000..8c279b59 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/modules/uc_catalogs_init/variables.tf @@ -0,0 +1,25 @@ +variable "s3_bucket_name" { + type = string + description = "Name of the S3 bucket." +} + +variable "iam_role_name" { + type = string + description = "IAM role name for Databricks Unity Catalog." +} + +variable "storage_credential_name" { + type = string + description = "Databricks Unity Catalog storage credential name." +} + +variable "external_location_name" { + type = string + description = "Databricks Unity Catalog external location name." +} + +variable "catalog_name" { + type = string + description = "Databricks Unity Catalog catalog name." +} + diff --git a/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/prerequisite_tf_step/main.tf b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/prerequisite_tf_step/main.tf new file mode 100644 index 00000000..b66ffc8b --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/prerequisite_tf_step/main.tf @@ -0,0 +1,65 @@ +terraform { + required_providers { + local = { + source = "hashicorp/local" + version = "~> 2.4" + } + random = { + source = "hashicorp/random" + version = "~> 3.5" + } + } +} + +resource "random_string" "naming" { + special = false + upper = false + length = 4 +} + +variable "customized_prefix" { + type = string + default = "demo" +} + +variable "region" { + type = string + default = "ap-southeast-1" +} + +locals { + prefix = var.customized_prefix == "" ? random_string.naming.result : "${var.customized_prefix}-${random_string.naming.result}" + config_items = [ + { + s3_bucket_name = "${local.prefix}-uc-bucket-001" + iam_role_name = "${local.prefix}-iam-role-001" + region = var.region + storage_credential_name = "${local.prefix}-uc-credential-001" + external_location_name = "${local.prefix}-external-location-001" + catalog_name = "${local.prefix}-catalog01" + }, + { + s3_bucket_name = "${local.prefix}-uc-bucket-002" + iam_role_name = "${local.prefix}-iam-role-002" + region = var.region + storage_credential_name = "${local.prefix}-uc-credential-002" + external_location_name = "${local.prefix}-external-location-002" + catalog_name = "${local.prefix}-catalog02" + }, + { + s3_bucket_name = "${local.prefix}-uc-bucket-003" + iam_role_name = "${local.prefix}-iam-role-003" + region = var.region + storage_credential_name = "${local.prefix}-uc-credential-003" + external_location_name = "${local.prefix}-external-location-003" + catalog_name = "${local.prefix}-catalog03" + }, + ] + + yaml_structure = local.config_items +} + +resource "local_file" "structured_yaml" { + filename = "../configs/structured-output.yaml" + content = yamlencode(local.yaml_structure) +} diff --git a/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/variables.tf b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/variables.tf new file mode 100644 index 00000000..6bf0a482 --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/databricks_workspace_tf_pipeline/variables.tf @@ -0,0 +1,25 @@ +variable "databricks_account_id" { + type = string +} + +variable "client_id" { + type = string +} + +variable "client_secret" { + type = string +} + +variable "databricks_host" { + type = string +} + +variable "region" { + type = string + default = "ap-southeast-1" +} + +variable "databricks_users" { + type = list(string) + description = "List of Databricks usernames who need dedicated single node clusters" +} diff --git a/examples/aws-databricks-modular-privatelink/iam.tf b/examples/aws-databricks-modular-privatelink/iam.tf deleted file mode 100644 index f6a85cf1..00000000 --- a/examples/aws-databricks-modular-privatelink/iam.tf +++ /dev/null @@ -1,44 +0,0 @@ -data "databricks_aws_assume_role_policy" "this" { - external_id = var.databricks_account_id -} - -resource "aws_iam_role" "cross_account_role" { - name = "${local.prefix}-crossaccount" - assume_role_policy = data.databricks_aws_assume_role_policy.this.json - tags = var.tags -} - -data "databricks_aws_crossaccount_policy" "this" { -} - -resource "aws_iam_role_policy" "this" { - name = "${local.prefix}-policy" - role = aws_iam_role.cross_account_role.id - policy = data.databricks_aws_crossaccount_policy.this.json -} - -# a walkaround using sleep to wait for role to be created -resource "time_sleep" "wait" { - depends_on = [ - aws_iam_role.cross_account_role - ] - create_duration = "20s" -} - -# Generate credentials to create and thereafter enter the Databricks workspace -resource "databricks_mws_credentials" "this" { - provider = databricks.mws - account_id = var.databricks_account_id - role_arn = aws_iam_role.cross_account_role.arn - credentials_name = "${local.prefix}-creds" - depends_on = [time_sleep.wait] -} - -output "policy" { - value = data.databricks_aws_crossaccount_policy.this.json -} -/* -output "assume_role_policy" { - value = data.databricks_aws_assume_role_policy.this.json -} -*/ diff --git a/examples/aws-databricks-modular-privatelink/identity_management.tf b/examples/aws-databricks-modular-privatelink/identity_management.tf new file mode 100644 index 00000000..baf8a87b --- /dev/null +++ b/examples/aws-databricks-modular-privatelink/identity_management.tf @@ -0,0 +1,98 @@ +# metastore admin group +resource "databricks_group" "metastore_admin_group" { + provider = databricks.mws + display_name = local.user_config.tf_admin_groups.metastore_admin_group.display_name +} + +// use the spn that was manually created +data "databricks_service_principal" "spn" { + provider = databricks.mws + application_id = var.client_id +} + +resource "databricks_group_member" "metastore_admin_group_member_spn" { + provider = databricks.mws + group_id = databricks_group.metastore_admin_group.id + member_id = data.databricks_service_principal.spn.id +} + +locals { + user_config = yamldecode(file("${path.module}/configs/account_users.yaml")) + metastore_admin_new_members = lookup(local.user_config.tf_admin_groups.metastore_admin_group.members, "new_metastore_admins", []) + metastore_admin_existing_members = lookup(local.user_config.tf_admin_groups.metastore_admin_group.members, "existing_metastore_admins", []) + + # Create flattened list of terraform-managed group memberships + terraform_group_memberships = flatten([ + for group_name, group in local.user_config.tf_non_admin_groups : [ + for username in group.members : { + group_name = group_name + user_name = username + } + ] + ]) +} + +####### users ####### +# Create new users +resource "databricks_user" "new_users" { + provider = databricks.mws + for_each = local.user_config.new_users + user_name = each.key + display_name = each.value.display_name +} + +# Import existing users that you want to interact with terraform, for example, assign them into groups via TF +data "databricks_user" "existing_users" { + provider = databricks.mws + for_each = toset(local.user_config.existing_users) + user_name = each.value +} + +####### metastore admin group ####### +# Add new metastore admins to metastore admin group +resource "databricks_group_member" "metastore_admin_new_members" { + provider = databricks.mws + for_each = toset(local.metastore_admin_new_members) + group_id = databricks_group.metastore_admin_group.id + member_id = databricks_user.new_users[each.value].id +} + +# Add existing metastore admins to metastore admin group +data "databricks_user" "metastore_admin_existing_users" { + provider = databricks.mws + for_each = toset(local.metastore_admin_existing_members) + user_name = each.value +} + +resource "databricks_group_member" "metastore_admin_existing_members" { + provider = databricks.mws + for_each = toset(local.metastore_admin_existing_members) + group_id = databricks_group.metastore_admin_group.id + member_id = data.databricks_user.metastore_admin_existing_users[each.value].id +} + +####### groups ####### +resource "databricks_group" "tf_non_admin_groups" { + provider = databricks.mws + for_each = local.user_config.tf_non_admin_groups + display_name = each.value.display_name +} +resource "databricks_group_member" "tf_group_members" { + provider = databricks.mws + for_each = { + for membership in local.terraform_group_memberships : + "${membership.group_name}-${membership.user_name}" => membership + } + group_id = databricks_group.tf_non_admin_groups[each.value.group_name].id + member_id = contains(keys(databricks_user.new_users), each.value.user_name) ? databricks_user.new_users[each.value.user_name].id : data.databricks_user.existing_users[each.value.user_name].id +} + +####### add users to workspace ####### +# Add all users from yaml file into desired workspace, below is example for env1 1 workspace +resource "databricks_mws_permission_assignment" "add_users_to_workspace" { + provider = databricks.mws + for_each = local.user_config.new_users + workspace_id = module.multiple_workspaces["env1"].workspace_ids[0] + principal_id = databricks_user.new_users[each.key].id + permissions = ["USER"] +} diff --git a/examples/aws-databricks-modular-privatelink/images/instance-profile.png b/examples/aws-databricks-modular-privatelink/images/instance-profile.png deleted file mode 100644 index 006f9880..00000000 Binary files a/examples/aws-databricks-modular-privatelink/images/instance-profile.png and /dev/null differ diff --git a/examples/aws-databricks-modular-privatelink/images/ip-access-list-block.png b/examples/aws-databricks-modular-privatelink/images/ip-access-list-block.png deleted file mode 100644 index 7389eb59..00000000 Binary files a/examples/aws-databricks-modular-privatelink/images/ip-access-list-block.png and /dev/null differ diff --git a/examples/aws-databricks-modular-privatelink/images/ip-access-lists-flow.png b/examples/aws-databricks-modular-privatelink/images/ip-access-lists-flow.png deleted file mode 100644 index 3e5cc49c..00000000 Binary files a/examples/aws-databricks-modular-privatelink/images/ip-access-lists-flow.png and /dev/null differ diff --git a/examples/aws-databricks-modular-privatelink/images/orphaned-resources.png b/examples/aws-databricks-modular-privatelink/images/orphaned-resources.png deleted file mode 100644 index 9803d5bb..00000000 Binary files a/examples/aws-databricks-modular-privatelink/images/orphaned-resources.png and /dev/null differ diff --git a/examples/aws-databricks-modular-privatelink/images/tf-remote-s3-backend.png b/examples/aws-databricks-modular-privatelink/images/tf-remote-s3-backend.png deleted file mode 100644 index 36dc8411..00000000 Binary files a/examples/aws-databricks-modular-privatelink/images/tf-remote-s3-backend.png and /dev/null differ diff --git a/examples/aws-databricks-modular-privatelink/instance_profile.tf b/examples/aws-databricks-modular-privatelink/instance_profile.tf deleted file mode 100644 index 153ceeb7..00000000 --- a/examples/aws-databricks-modular-privatelink/instance_profile.tf +++ /dev/null @@ -1,90 +0,0 @@ -// create a data s3 bucket for demo purposes -resource "aws_s3_bucket" "data_bucket" { - bucket = "data-bucket-for-test" // hard-coded value for demo only - acl = "private" - force_destroy = true -} - -resource "aws_iam_policy" "added_policy" { - name = "grant-specific-s3-policy" - description = "A test policy" - - policy = < yamldecode(file(env.config_file_path)) + } } -// for each VPC, you should create workspace_collection -module "workspace_collection" { - for_each = local.workspace_confs +# Single module block that creates multiple workspaces +module "multiple_workspaces" { + for_each = local.environments providers = { - databricks = databricks.mws - aws = aws + aws = aws + databricks.mws = databricks.mws + time = time } - source = "./modules/mws_workspace" + source = "./databricks_account_tf_pipeline" + aws_account_id = var.aws_account_id databricks_account_id = var.databricks_account_id - credentials_id = databricks_mws_credentials.this.credentials_id - prefix = each.value.prefix - region = each.value.region - workspace_name = each.value.workspace_name - tags = each.value.tags - existing_vpc_id = aws_vpc.mainvpc.id - nat_gateways_id = aws_nat_gateway.nat_gateways[0].id - security_group_ids = [aws_security_group.sg.id] - private_subnet_pair = [each.value.private_subnet_pair.subnet1_cidr, each.value.private_subnet_pair.subnet2_cidr] - workspace_storage_cmk = module.databricks_cmk.workspace_storage_cmk - managed_services_cmk = module.databricks_cmk.managed_services_cmk - root_bucket_name = each.value.root_bucket_name - relay_vpce_id = [databricks_mws_vpc_endpoint.relay.vpc_endpoint_id] - rest_vpce_id = [databricks_mws_vpc_endpoint.backend_rest_vpce.vpc_endpoint_id] - depends_on = [ - databricks_mws_vpc_endpoint.relay, - databricks_mws_vpc_endpoint.backend_rest_vpce - ] -} - -data "http" "my" { // check host machine public IP - url = "https://ifconfig.me" -} - -// save deployment info to local file for future configuration -resource "local_file" "deployment_information" { - for_each = local.workspace_confs - - content = jsonencode({ - "prefix" = "${local.workspace_confs[each.key].prefix}-${local.prefix}" - "workspace_url" = module.workspace_collection[each.key].workspace_url - "block_list" = "${local.workspace_confs[each.key].block_list}" - "allow_list" = "${concat(local.workspace_confs[each.key].allow_list, ["${data.http.my.body}/32"])}" - }) - filename = "./artifacts/${each.key}.json" + region = var.region + client_id = var.client_id + client_secret = var.client_secret + + # Use the values extracted from the YAML for this environment + vpc_name = local.config_files[each.key].vpc.name + vpc_cidr = local.config_files[each.key].vpc.cidr + workspace_number = local.config_files[each.key].workspace_number + number_of_azs = local.config_files[each.key].subnets.number_of_azs + + private_subnets = [for subnet in local.config_files[each.key].subnets.private : subnet.cidr] + private_subnet_names = [for subnet in local.config_files[each.key].subnets.private : subnet.name] + public_subnets = [for subnet in local.config_files[each.key].subnets.public : subnet.cidr] + public_subnet_names = [for subnet in local.config_files[each.key].subnets.public : subnet.name] + intra_subnets = [for subnet in local.config_files[each.key].subnets.intra : subnet.cidr] + intra_subnet_names = [for subnet in local.config_files[each.key].subnets.intra : subnet.name] + + scc_relay = local.config_files[each.key].scc_relay + workspace = local.config_files[each.key].workspace + + resource_prefix = local.config_files[each.key].resource_prefix + deploy_metastore = local.config_files[each.key].deploy_metastore + existing_metastore_id = lookup(local.config_files[each.key], "existing_metastore_id", null) + metastore_admin_group_name = lookup(local.config_files[each.key], "metastore_admin_group_name", null) + deploy_log_delivery = lookup(local.config_files[each.key], "deploy_log_delivery", false) + tags = local.config_files[each.key].tags } diff --git a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/main.tf b/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/main.tf deleted file mode 100644 index ca6675e6..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/main.tf +++ /dev/null @@ -1,23 +0,0 @@ -resource "aws_kms_key" "workspace_storage_cmk" { - policy = data.aws_iam_policy_document.databricks_storage_cmk.json - tags = { - Name = "${var.resource_prefix}-${var.region}-ws-cmk" - } -} - -resource "aws_kms_alias" "workspace_storage_cmk_alias" { - name_prefix = "alias/${var.resource_prefix}-workspace-storage" - target_key_id = aws_kms_key.workspace_storage_cmk.key_id -} - -resource "aws_kms_key" "managed_services_cmk" { - policy = data.aws_iam_policy_document.databricks_managed_services_cmk.json - tags = { - Name = "${var.resource_prefix}-${var.region}-ms-cmk" - } -} - -resource "aws_kms_alias" "managed_services_cmk_alias" { - name_prefix = "alias/${var.resource_prefix}-managed-services" - target_key_id = aws_kms_key.managed_services_cmk.key_id -} diff --git a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/outputs.tf b/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/outputs.tf deleted file mode 100644 index 7b8e2fcb..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/outputs.tf +++ /dev/null @@ -1,13 +0,0 @@ -output "workspace_storage_cmk" { - value = { - key_alias = aws_kms_alias.workspace_storage_cmk_alias.name - key_arn = aws_kms_key.workspace_storage_cmk.arn - } -} - -output "managed_services_cmk" { - value = { - key_alias = aws_kms_alias.managed_services_cmk_alias.name - key_arn = aws_kms_key.managed_services_cmk.arn - } -} diff --git a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/providers.tf b/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/providers.tf deleted file mode 100644 index f2702bf6..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/providers.tf +++ /dev/null @@ -1,7 +0,0 @@ -terraform { - required_providers { - aws = { - source = "hashicorp/aws" - } - } -} diff --git a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/variables.tf b/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/variables.tf deleted file mode 100644 index 23172374..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/databricks_cmk/variables.tf +++ /dev/null @@ -1,15 +0,0 @@ -variable "resource_prefix" { - type = string -} - -variable "cross_account_role_arn" { - type = string -} - -variable "cmk_admin" { - type = string -} - -variable "region" { - type = string -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/README.md b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/README.md deleted file mode 100644 index a3b1cc98..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/README.md +++ /dev/null @@ -1,44 +0,0 @@ - -## Description - -This module `mws_workspace` creates an E2 Databricks workspace into an existing customer managed VPC; the module contains 2 sub-modules: `mws_network` and `mws_storage`, which are 2 abstracted layers as pre-requisite for the E2 workspace creation. - -## Providers - -| Name | Version | -| ---------------------------------------------------------------------- | ------- | -| [databricks](#provider\_databricks) | n/a | - -## Modules - -| Name | Source | Version | -| ---------------------------------------------------------------------------------- | --------------------- | ------- | -| [my\_mws\_network](#module\_my\_mws\_network) | ./modules/mws_network | n/a | -| [my\_root\_bucket](#module\_my\_root\_bucket) | ./modules/mws_storage | n/a | - -## Resources - -| Name | Type | -| ------------------------------------------------------------------------------------------------------------------------------------ | -------- | -| [databricks_mws_workspaces.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_workspaces) | resource | - -## Inputs - -| Name | Description | Type | Default | Required | -| ----------------------------------------------------------------------------------------------------- | ------------------ | -------------- | ------- | :------: | -| [credentials\_id](#input\_credentials\_id) | n/a | `string` | n/a | yes | -| [databricks\_account\_id](#input\_databricks\_account\_id) | n/a | `string` | n/a | yes | -| [existing\_vpc\_id](#input\_existing\_vpc\_id) | for network config | `string` | n/a | yes | -| [nat\_gateways\_id](#input\_nat\_gateways\_id) | n/a | `string` | n/a | yes | -| [prefix](#input\_prefix) | n/a | `string` | n/a | yes | -| [private\_subnet\_pair](#input\_private\_subnet\_pair) | n/a | `list(string)` | n/a | yes | -| [region](#input\_region) | n/a | `string` | n/a | yes | -| [security\_group\_ids](#input\_security\_group\_ids) | n/a | `list(string)` | n/a | yes | -| [workspace\_name](#input\_workspace\_name) | n/a | `string` | n/a | yes | - -## Outputs - -| Name | Description | -| ----------------------------------------------------------------------------- | ----------- | -| [workspace\_url](#output\_workspace\_url) | n/a | - \ No newline at end of file diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/main.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/main.tf deleted file mode 100644 index 0b782ef8..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/main.tf +++ /dev/null @@ -1,69 +0,0 @@ -module "my_mws_network" { - source = "./modules/mws_network" - databricks_account_id = var.databricks_account_id - aws_nat_gateway_id = var.nat_gateways_id - existing_vpc_id = var.existing_vpc_id - security_group_ids = var.security_group_ids - region = var.region - private_subnet_pair = var.private_subnet_pair - prefix = "${var.prefix}-network" - relay_vpce_id = var.relay_vpce_id - rest_vpce_id = var.rest_vpce_id - tags = var.tags -} - -module "my_root_bucket" { - source = "./modules/mws_storage" - databricks_account_id = var.databricks_account_id - region = var.region - root_bucket_name = var.root_bucket_name - tags = var.tags -} - -resource "databricks_mws_customer_managed_keys" "workspace_storage" { - account_id = var.databricks_account_id - aws_key_info { - key_arn = var.workspace_storage_cmk.key_arn - key_alias = var.workspace_storage_cmk.key_alias - } - use_cases = ["STORAGE"] -} - -resource "databricks_mws_customer_managed_keys" "managed_services" { - account_id = var.databricks_account_id - aws_key_info { - key_arn = var.managed_services_cmk.key_arn - key_alias = var.managed_services_cmk.key_alias - } - use_cases = ["MANAGED_SERVICES"] -} - - -resource "databricks_mws_private_access_settings" "pas" { - account_id = var.databricks_account_id - private_access_settings_name = "Private Access Settings for ${var.prefix}" - region = var.region - public_access_enabled = true - private_access_level = "ACCOUNT" // a fix for recent changes - 202209 -} - - -resource "databricks_mws_workspaces" "this" { - account_id = var.databricks_account_id - aws_region = var.region - workspace_name = var.workspace_name - private_access_settings_id = databricks_mws_private_access_settings.pas.private_access_settings_id - pricing_tier = "ENTERPRISE" - - # deployment_name = local.prefix - - credentials_id = var.credentials_id - storage_configuration_id = module.my_root_bucket.storage_configuration_id - network_id = module.my_mws_network.network_id - - # cmk - storage_customer_managed_key_id = databricks_mws_customer_managed_keys.workspace_storage.customer_managed_key_id - managed_services_customer_managed_key_id = databricks_mws_customer_managed_keys.managed_services.customer_managed_key_id - - depends_on = [module.my_mws_network, module.my_root_bucket] -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/README.md b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/README.md deleted file mode 100644 index b7d326da..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/README.md +++ /dev/null @@ -1,46 +0,0 @@ -## This module uses an existing VPC, inject 2 subnets into the VPC. - - -## Description - -This module `mws_network` is an abstract layer that wraps and returns `databricks_mws_networks`, which is a pre-requisite resource for the E2 workspace creation. - -## Providers - -| Name | Version | -| ---------------------------------------------------------------------- | ------- | -| [aws](#provider\_aws) | n/a | -| [databricks](#provider\_databricks) | n/a | - -## Modules - -No modules. - -## Resources - -| Name | Type | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -| [aws_route_table.private_route_tables](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route_table) | resource | -| [aws_route_table_association.private_route_table_associations](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route_table_association) | resource | -| [aws_subnet.private_subnets](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/subnet) | resource | -| [databricks_mws_networks.mwsnetwork](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_networks) | resource | -| [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source | - -## Inputs - -| Name | Description | Type | Default | Required | -| ----------------------------------------------------------------------------------------------------- | ---------------------------------------------------- | -------------- | ------- | :------: | -| [aws\_nat\_gateway\_id](#input\_aws\_nat\_gateway\_id) | n/a | `string` | n/a | yes | -| [databricks\_account\_id](#input\_databricks\_account\_id) | n/a | `string` | n/a | yes | -| [existing\_vpc\_id](#input\_existing\_vpc\_id) | provide existing vpc id for resources to deploy into | `string` | n/a | yes | -| [prefix](#input\_prefix) | n/a | `string` | n/a | yes | -| [private\_subnet\_pair](#input\_private\_subnet\_pair) | contains only 2 subnets cidr blocks | `list(string)` | n/a | yes | -| [region](#input\_region) | n/a | `string` | n/a | yes | -| [security\_group\_ids](#input\_security\_group\_ids) | n/a | `list(string)` | n/a | yes | - -## Outputs - -| Name | Description | -| -------------------------------------------------------------------- | ----------- | -| [network\_id](#output\_network\_id) | n/a | - \ No newline at end of file diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/main.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/main.tf deleted file mode 100644 index fc534cd2..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/main.tf +++ /dev/null @@ -1,44 +0,0 @@ -data "aws_availability_zones" "available" {} - -# Private subnets -resource "aws_subnet" "private_subnets" { - count = length(var.private_subnet_pair) - vpc_id = var.existing_vpc_id - cidr_block = var.private_subnet_pair[count.index] - availability_zone = data.aws_availability_zones.available.names[count.index] - map_public_ip_on_launch = false - - tags = var.tags -} - -# Private route table -resource "aws_route_table" "private_route_tables" { - count = length(var.private_subnet_pair) - vpc_id = var.existing_vpc_id - - route { - cidr_block = "0.0.0.0/0" - nat_gateway_id = var.aws_nat_gateway_id - } - tags = var.tags -} - -# Private route table association -resource "aws_route_table_association" "private_route_table_associations" { - count = length(var.private_subnet_pair) - subnet_id = aws_subnet.private_subnets[count.index].id - route_table_id = aws_route_table.private_route_tables[count.index].id -} - -resource "databricks_mws_networks" "mwsnetwork" { - account_id = var.databricks_account_id - network_name = "${var.prefix}-network" - vpc_id = var.existing_vpc_id - subnet_ids = [aws_subnet.private_subnets.0.id, aws_subnet.private_subnets.1.id] - security_group_ids = var.security_group_ids - - vpc_endpoints { - dataplane_relay = var.relay_vpce_id - rest_api = var.rest_vpce_id - } -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/providers.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/providers.tf deleted file mode 100644 index f25d29af..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/providers.tf +++ /dev/null @@ -1,10 +0,0 @@ -terraform { - required_providers { - databricks = { - source = "databricks/databricks" - } - aws = { - source = "hashicorp/aws" - } - } -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/variables.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/variables.tf deleted file mode 100644 index 899ec83e..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_network/variables.tf +++ /dev/null @@ -1,41 +0,0 @@ -// provide existing vpc id for resources to deploy into -variable "existing_vpc_id" { - type = string -} - -variable "databricks_account_id" { - type = string -} - -variable "region" { - type = string -} - -variable "prefix" { - type = string -} - -variable "aws_nat_gateway_id" { - type = string -} - -//contains only 2 subnets cidr blocks -variable "private_subnet_pair" { - type = list(string) -} - -variable "security_group_ids" { - type = list(string) -} - -variable "relay_vpce_id" { - type = list(string) -} - -variable "rest_vpce_id" { - type = list(string) -} - -variable "tags" { - type = map(string) -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/README.md b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/README.md deleted file mode 100644 index 60b24dfa..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/README.md +++ /dev/null @@ -1,42 +0,0 @@ -## Module for AWS Databricks workspace storage configuration - - -## Description - -Returns a root bucket for workspace to use as dbfs. This module `mws_storage` is an abstract layer that wraps and returns `databricks_mws_storage_configurations`, which is a pre-requisite resource for the E2 workspace creation. - -## Providers - -| Name | Version | -| ---------------------------------------------------------------------- | ------- | -| [aws](#provider\_aws) | n/a | -| [databricks](#provider\_databricks) | n/a | - -## Modules - -No modules. - -## Resources - -| Name | Type | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------- | -| [aws_s3_bucket.root_storage_bucket](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket) | resource | -| [aws_s3_bucket_policy.root_bucket_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_policy) | resource | -| [aws_s3_bucket_public_access_block.root_storage_bucket](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_public_access_block) | resource | -| [databricks_mws_storage_configurations.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_storage_configurations) | resource | -| [databricks_aws_bucket_policy.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/data-sources/aws_bucket_policy) | data source | - -## Inputs - -| Name | Description | Type | Default | Required | -| ----------------------------------------------------------------------------------------------------- | ----------- | -------- | ------- | :------: | -| [databricks\_account\_id](#input\_databricks\_account\_id) | n/a | `string` | n/a | yes | -| [region](#input\_region) | n/a | `string` | n/a | yes | -| [root\_bucket\_name](#input\_root\_bucket\_name) | n/a | `string` | n/a | yes | - -## Outputs - -| Name | Description | -| ---------------------------------------------------------------------------------------------------------------- | ----------- | -| [storage\_configuration\_id](#output\_storage\_configuration\_id) | n/a | - \ No newline at end of file diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/main.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/main.tf deleted file mode 100644 index 8f9903e9..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/main.tf +++ /dev/null @@ -1,30 +0,0 @@ -resource "aws_s3_bucket" "root_storage_bucket" { - bucket = var.root_bucket_name - acl = "private" - versioning { - enabled = false - } - force_destroy = true - tags = var.tags -} - -resource "aws_s3_bucket_public_access_block" "root_storage_bucket" { - bucket = aws_s3_bucket.root_storage_bucket.id - ignore_public_acls = true - depends_on = [aws_s3_bucket.root_storage_bucket] -} - -data "databricks_aws_bucket_policy" "this" { - bucket = aws_s3_bucket.root_storage_bucket.bucket -} - -resource "aws_s3_bucket_policy" "root_bucket_policy" { - bucket = aws_s3_bucket.root_storage_bucket.id - policy = data.databricks_aws_bucket_policy.this.json -} - -resource "databricks_mws_storage_configurations" "this" { - account_id = var.databricks_account_id - bucket_name = aws_s3_bucket.root_storage_bucket.bucket - storage_configuration_name = var.root_bucket_name -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/providers.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/providers.tf deleted file mode 100644 index f25d29af..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/providers.tf +++ /dev/null @@ -1,10 +0,0 @@ -terraform { - required_providers { - databricks = { - source = "databricks/databricks" - } - aws = { - source = "hashicorp/aws" - } - } -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/variables.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/variables.tf deleted file mode 100644 index b9c49132..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/modules/mws_storage/variables.tf +++ /dev/null @@ -1,15 +0,0 @@ -variable "databricks_account_id" { - type = string -} - -variable "root_bucket_name" { - type = string -} - -variable "region" { - type = string -} - -variable "tags" { - type = map(string) -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/outputs.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/outputs.tf deleted file mode 100644 index 67054c17..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/outputs.tf +++ /dev/null @@ -1,3 +0,0 @@ -output "workspace_url" { - value = databricks_mws_workspaces.this.workspace_url -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/providers.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/providers.tf deleted file mode 100644 index f25d29af..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/providers.tf +++ /dev/null @@ -1,10 +0,0 @@ -terraform { - required_providers { - databricks = { - source = "databricks/databricks" - } - aws = { - source = "hashicorp/aws" - } - } -} diff --git a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/variables.tf b/examples/aws-databricks-modular-privatelink/modules/mws_workspace/variables.tf deleted file mode 100644 index 775b905b..00000000 --- a/examples/aws-databricks-modular-privatelink/modules/mws_workspace/variables.tf +++ /dev/null @@ -1,58 +0,0 @@ -variable "databricks_account_id" { - type = string -} - -variable "credentials_id" { - type = string -} - -variable "prefix" { - type = string // should be a randomized string -} - -variable "region" { - type = string -} - -variable "workspace_name" { - type = string -} - -// for network config -variable "existing_vpc_id" { - type = string -} - -variable "nat_gateways_id" { - type = string -} - -variable "security_group_ids" { - type = list(string) -} - -variable "private_subnet_pair" { - type = list(string) -} -// for cmk config -variable "managed_services_cmk" { -} - -variable "workspace_storage_cmk" { -} - -variable "root_bucket_name" { - type = string -} - -variable "relay_vpce_id" { - type = list(string) -} - -variable "rest_vpce_id" { - type = list(string) -} - -variable "tags" { - type = map(string) -} diff --git a/examples/aws-databricks-modular-privatelink/outputs.tf b/examples/aws-databricks-modular-privatelink/outputs.tf index 963286a1..53e9a757 100644 --- a/examples/aws-databricks-modular-privatelink/outputs.tf +++ b/examples/aws-databricks-modular-privatelink/outputs.tf @@ -1,34 +1,9 @@ -output "databricks_hosts" { - value = tomap({ - for k, ws in module.workspace_collection : k => ws.workspace_url - }) -} - -output "arn" { - value = aws_iam_role.cross_account_role.arn -} - -/* -// export token for integration tests to run on -output "databricks_token" { - value = databricks_token.pat.token_value - sensitive = true -} -*/ -output "role_for_s3_access_id" { - value = aws_iam_role.role_for_s3_access.id -} - -output "role_for_s3_access_name" { - value = aws_iam_role.role_for_s3_access.name -} - -output "instance_profile_arn" { - value = aws_iam_instance_profile.instance_profile.arn -} -/* -output "databricks_instance_profile_id" { - value = databricks_instance_profile.instance_profile.id +output "workspaces_urls" { + value = { + "env1" = [for workspace in module.multiple_workspaces["env1"].databricks_host : workspace] + /* + "env2" = [for workspace in module.multiple_workspaces["env2"].databricks_host : workspace] + */ + } } -*/ diff --git a/examples/aws-databricks-modular-privatelink/privatelink.tf b/examples/aws-databricks-modular-privatelink/privatelink.tf deleted file mode 100644 index 6348f3bc..00000000 --- a/examples/aws-databricks-modular-privatelink/privatelink.tf +++ /dev/null @@ -1,125 +0,0 @@ -// VPC Endpoints -/* -module "vpc_endpoints" { - source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints" - version = "3.11.0" - - vpc_id = aws_vpc.mainvpc.id - security_group_ids = [aws_security_group.sg.id] - - endpoints = { - s3 = { - count = length(local.private_subnets_cidr) - service = "s3" - service_type = "Gateway" - route_table_ids = flatten([ - aws_route_table.private_rt[*].id - ]) - tags = { - Name = "${local.prefix}-s3-vpc-endpoint" - } - }, - sts = { - service = "sts" - private_dns_enabled = true - subnet_ids = aws_subnet.private[*].id - tags = { - Name = "${local.prefix}-sts-vpc-endpoint" - } - }, - kinesis-streams = { - service = "kinesis-streams" - private_dns_enabled = true - subnet_ids = aws_subnet.private[*].id - tags = { - Name = "${local.prefix}-kinesis-vpc-endpoint" - } - } - } -} -*/ - -resource "aws_security_group" "privatelink" { - vpc_id = aws_vpc.mainvpc.id - - ingress { - description = "Inbound rules" - from_port = 443 - to_port = 443 - protocol = "tcp" - security_groups = [aws_security_group.sg.id] - } - - ingress { - description = "Inbound rules" - from_port = 6666 - to_port = 6666 - protocol = "tcp" - security_groups = [aws_security_group.sg.id] - } - - egress { - description = "Outbound rules" - from_port = 443 - to_port = 443 - protocol = "tcp" - security_groups = [aws_security_group.sg.id] - } - - egress { - description = "Outbound rules" - from_port = 6666 - to_port = 6666 - protocol = "tcp" - security_groups = [aws_security_group.sg.id] - } - - tags = { - Name = "${local.prefix}-privatelink-sg" - } -} - -resource "aws_vpc_endpoint" "backend_rest" { - vpc_id = aws_vpc.mainvpc.id - service_name = var.workspace_vpce_service - vpc_endpoint_type = "Interface" - security_group_ids = [aws_security_group.privatelink.id] - subnet_ids = aws_subnet.privatelink[*].id - private_dns_enabled = true // try to directly set this to true in the first apply - depends_on = [aws_subnet.privatelink] - tags = { - Name = "${local.prefix}-databricks-backend-rest" - } -} - -resource "aws_vpc_endpoint" "backend_relay" { - vpc_id = aws_vpc.mainvpc.id - service_name = var.relay_vpce_service - vpc_endpoint_type = "Interface" - security_group_ids = [aws_security_group.privatelink.id] - subnet_ids = aws_subnet.privatelink[*].id - private_dns_enabled = true - depends_on = [aws_subnet.privatelink] - tags = { - Name = "${local.prefix}-databricks-backend-relay" - } -} - -// from official guide -resource "databricks_mws_vpc_endpoint" "backend_rest_vpce" { - provider = databricks.mws - account_id = var.databricks_account_id - aws_vpc_endpoint_id = aws_vpc_endpoint.backend_rest.id - vpc_endpoint_name = "${local.prefix}-vpc-backend-${aws_vpc.mainvpc.id}" - region = var.region - depends_on = [aws_vpc_endpoint.backend_rest] -} - -resource "databricks_mws_vpc_endpoint" "relay" { - provider = databricks.mws - account_id = var.databricks_account_id - aws_vpc_endpoint_id = aws_vpc_endpoint.backend_relay.id - vpc_endpoint_name = "${local.prefix}-vpc-relay-${aws_vpc.mainvpc.id}" - region = var.region - depends_on = [aws_vpc_endpoint.backend_relay] -} diff --git a/examples/aws-databricks-modular-privatelink/providers.tf b/examples/aws-databricks-modular-privatelink/providers.tf index 85384f0f..bcf226e9 100644 --- a/examples/aws-databricks-modular-privatelink/providers.tf +++ b/examples/aws-databricks-modular-privatelink/providers.tf @@ -1,20 +1,16 @@ terraform { - /* - backend "s3" { - bucket = "tf-backend-bucket-haowang" # Replace this with your bucket name! - key = "global/s3-databricks-project/terraform.tfstate" - region = "ap-southeast-1" - dynamodb_table = "tf-backend-dynamodb-databricks-project" # Replace this with your DynamoDB table name! - encrypt = true - } - */ required_providers { - databricks = { - source = "databricks/databricks" - } aws = { source = "hashicorp/aws" - version = "~> 4.0" + version = "5.93.0" + } + databricks = { + source = "databricks/databricks" + version = "1.70.0" + } + time = { + source = "hashicorp/time" + version = "0.9.1" } } } @@ -23,12 +19,13 @@ provider "aws" { region = var.region } -// initialize provider in "MWS" mode to provision new workspace provider "databricks" { alias = "mws" host = "https://accounts.cloud.databricks.com" account_id = var.databricks_account_id - client_id = var.databricks_account_client_id - client_secret = var.databricks_account_client_secret - auth_type = "oauth-m2m" + client_id = var.client_id + client_secret = var.client_secret +} + +provider "time" { } diff --git a/examples/aws-databricks-modular-privatelink/variables.tf b/examples/aws-databricks-modular-privatelink/variables.tf index 10cf2d13..b9fe40cb 100644 --- a/examples/aws-databricks-modular-privatelink/variables.tf +++ b/examples/aws-databricks-modular-privatelink/variables.tf @@ -1,87 +1,20 @@ -variable "databricks_account_client_id" { - type = string - description = "Application ID of account-level service principal" -} - -variable "databricks_account_client_secret" { - type = string - description = "Client secret of account-level service principal" -} - variable "databricks_account_id" { - type = string - description = "Databricks Account ID" -} - -variable "region" { - type = string - description = "AWS region to deploy to" - default = "ap-southeast-1" -} - -#cmk -variable "cmk_admin" { - type = string - default = "arn:aws:iam::026655378770:user/hao" + type = string } -variable "tags" { - default = {} - type = map(string) - description = "Optional tags to add to created resources" +variable "aws_account_id" { + type = string } -variable "vpc_cidr" { - default = "10.109.0.0/17" +variable "client_id" { + type = string } -variable "public_subnets_cidr" { - type = list(string) - default = ["10.109.2.0/23"] +variable "client_secret" { + type = string } -variable "privatelink_subnets_cidr" { - type = list(string) - default = ["10.109.4.0/23"] -} - -variable "workspace_vpce_service" { - type = string - default = "com.amazonaws.vpce.ap-southeast-1.vpce-svc-02535b257fc253ff4" // for workspace vpce, ap-southeast-1 only -} - -variable "relay_vpce_service" { +variable "region" { type = string - default = "com.amazonaws.vpce.ap-southeast-1.vpce-svc-0557367c6fc1a0c5c" // for relay vpce, ap-southeast-1 only -} - -variable "workspace_1_config" { - default = { - private_subnet_pair = { subnet1_cidr = "10.109.6.0/23", subnet2_cidr = "10.109.8.0/23" } - workspace_name = "test-workspace-1" - prefix = "ws1" // prefix decides subnets name - region = "ap-southeast-1" - root_bucket_name = "test-workspace-1-rootbucket" - block_list = ["58.133.93.159"] - allow_list = ["65.184.145.97"] // if allow_list empty, all public IP not blocked by block_list are allowed - tags = { - "Name" = "test-workspace-1-tags", - "Env" = "test-ws-1" - } - } -} - -variable "workspace_2_config" { - default = { - private_subnet_pair = { subnet1_cidr = "10.109.10.0/23", subnet2_cidr = "10.109.12.0/23" } - workspace_name = "test-workspace-2" - prefix = "ws2" // prefix decides subnets name - region = "ap-southeast-1" - root_bucket_name = "test-workspace-2-rootbucket" - block_list = ["54.112.179.135", "195.78.164.130"] - allow_list = ["65.184.145.97"] // if allow_list empty, all public IP not blocked by block_list are allowed - tags = { - "Name" = "test-workspace-2-tags" - } - } + default = "ap-southeast-1" } diff --git a/examples/aws-databricks-modular-privatelink/vpc.tf b/examples/aws-databricks-modular-privatelink/vpc.tf deleted file mode 100644 index a5f8752a..00000000 --- a/examples/aws-databricks-modular-privatelink/vpc.tf +++ /dev/null @@ -1,139 +0,0 @@ -data "aws_availability_zones" "available" {} - -resource "aws_vpc" "mainvpc" { - cidr_block = var.vpc_cidr - enable_dns_hostnames = true - enable_dns_support = true - - tags = merge(var.tags, { - Name = "${local.prefix}-vpc" - }) -} - -# Public subnets collection, default 1 -resource "aws_subnet" "public_subnets" { - count = length(var.public_subnets_cidr) - vpc_id = aws_vpc.mainvpc.id - cidr_block = var.public_subnets_cidr[count.index] - availability_zone = data.aws_availability_zones.available.names[count.index] - map_public_ip_on_launch = true - - tags = merge(var.tags, { - Name = "${local.prefix}-${aws_vpc.mainvpc.id}-public-subnet" - }) -} - - -# Private subnets collection for Private Link (VPC endpoints), default 1 -resource "aws_subnet" "privatelink" { - count = length(var.privatelink_subnets_cidr) - vpc_id = aws_vpc.mainvpc.id - cidr_block = var.privatelink_subnets_cidr[count.index] - availability_zone = data.aws_availability_zones.available.names[count.index] - map_public_ip_on_launch = false // explicit private subnet - - tags = merge(var.tags, { - Name = "${local.prefix}-${aws_vpc.mainvpc.id}-pl-vpce-subnet" - }) -} - - -resource "aws_route_table" "pl_subnet_rt" { - vpc_id = aws_vpc.mainvpc.id - - tags = merge(var.tags, { - Name = "${local.prefix}-pl-local-route-tbl" - }) -} - -resource "aws_route_table_association" "dataplane_vpce_rtb" { - count = length(var.privatelink_subnets_cidr) - subnet_id = aws_subnet.privatelink[count.index].id - route_table_id = aws_route_table.pl_subnet_rt.id -} - - -# Nat gateway EIP -resource "aws_eip" "nat_gateway_elastic_ips" { - count = length(var.public_subnets_cidr) - vpc = true -} - -# Nat gateway -resource "aws_nat_gateway" "nat_gateways" { - count = length(var.public_subnets_cidr) - allocation_id = aws_eip.nat_gateway_elastic_ips[count.index].id - subnet_id = aws_subnet.public_subnets[count.index].id - - tags = merge(var.tags, { - Name = "${local.prefix}-${aws_vpc.mainvpc.id}-nat-gateway" - }) -} - -// Internet Gateway -resource "aws_internet_gateway" "igw" { - vpc_id = aws_vpc.mainvpc.id -} - -# Public route table -resource "aws_route_table" "public_route_table" { - vpc_id = aws_vpc.mainvpc.id - - route { - cidr_block = "0.0.0.0/0" - gateway_id = aws_internet_gateway.igw.id - } - - tags = merge(var.tags, { - Name = "${local.prefix}-public-rt" - }) -} - - -# Public route table association -resource "aws_route_table_association" "public_route_table_associations" { - count = length(var.public_subnets_cidr) - subnet_id = aws_subnet.public_subnets[count.index].id - route_table_id = aws_route_table.public_route_table.id -} - -// Security Group -resource "aws_security_group" "sg" { - vpc_id = aws_vpc.mainvpc.id - depends_on = [aws_vpc.mainvpc] - name = "databricks-vpc-security-group-${local.prefix}" - description = "databricks vpc security group for ${local.prefix}" - - dynamic "ingress" { - for_each = local.sg_ingress_protocol - content { - from_port = 0 - to_port = 65535 - protocol = ingress.value - self = true - } - } - - dynamic "egress" { - for_each = local.sg_egress_protocol - content { - from_port = 0 - to_port = 65535 - protocol = egress.value - self = true - } - } - - dynamic "egress" { - for_each = local.sg_egress_ports - content { - from_port = egress.value - to_port = egress.value - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - } - } - tags = { - Name = "${local.prefix}-dataplane-sg" - } -}