From 9e0193695262646a04dabb04a866712a070688d4 Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Wed, 2 Jun 2021 01:52:35 -0400 Subject: [PATCH] feat: Support Terraform remote state when generating GCP resources (#39) --- README.md | 4 + .../_terraform/ml_datasets_dataset.tf | 2 +- .../_terraform/penguins_pipeline.tf | 2 +- datasets/ml_datasets/_terraform/provider.tf | 7 +- datasets/ml_datasets/_terraform/variables.tf | 2 +- scripts/generate_terraform.py | 68 ++++++-- templates/terraform/backend.tf.jinja2 | 23 +++ tests/scripts/test_generate_terraform.py | 158 +++++++++++++++++- 8 files changed, 248 insertions(+), 18 deletions(-) create mode 100644 templates/terraform/backend.tf.jinja2 diff --git a/README.md b/README.md index 83f6bd8b1..1a9d4f4db 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,8 @@ $ python scripts/generate_terraform.py \ --region REGION \ --bucket-name-prefix UNIQUE_BUCKET_PREFIX \ [--env] dev \ + [--tf-state-bucket] \ + [--tf-state-prefix] \ [--tf-apply] \ [--impersonating-acct] IMPERSONATING_SERVICE_ACCT ``` @@ -90,6 +92,8 @@ This generates Terraform files (`*.tf`) in a `_terraform` directory inside that The `--bucket-name-prefix` is used to ensure that the buckets created by different environments and contributors are kept unique. This is to satisfy the rule where bucket names must be globally unique across all of GCS. Use hyphenated names (`some-prefix-123`) instead of snakecase or underscores (`some_prefix_123`). +The `--tf-state-bucket` and `--tf-state-prefix` parameters can be optionally used if one needs to use a remote store for the Terraform state. This will create a `backend.tf` file that points to the GCS bucket and prefix to use in storing the Terraform state. For more info, see the [Terraform docs for using GCS backends](https://www.terraform.io/docs/language/settings/backends/gcs.html). + In addition, the command above creates a "dot" directory in the project root. The directory name is the value you pass to the `--env` parameter of the command. If no `--env` argument was passed, the value defaults to `dev` (which generates the `.dev` folder). Consider this "dot" directory as your own dedicated space for prototyping. The files and variables created in that directory will use an isolated environment. All such directories are gitignored. diff --git a/datasets/ml_datasets/_terraform/ml_datasets_dataset.tf b/datasets/ml_datasets/_terraform/ml_datasets_dataset.tf index cbc8728a4..c565a3f25 100644 --- a/datasets/ml_datasets/_terraform/ml_datasets_dataset.tf +++ b/datasets/ml_datasets/_terraform/ml_datasets_dataset.tf @@ -18,7 +18,7 @@ resource "google_bigquery_dataset" "ml_datasets" { dataset_id = "ml_datasets" project = var.project_id - } +} output "bigquery_dataset-ml_datasets-dataset_id" { value = google_bigquery_dataset.ml_datasets.dataset_id diff --git a/datasets/ml_datasets/_terraform/penguins_pipeline.tf b/datasets/ml_datasets/_terraform/penguins_pipeline.tf index a61228ad6..88c5a1b19 100644 --- a/datasets/ml_datasets/_terraform/penguins_pipeline.tf +++ b/datasets/ml_datasets/_terraform/penguins_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "penguins" { dataset_id = "ml_datasets" table_id = "penguins" - + depends_on = [ google_bigquery_dataset.ml_datasets diff --git a/datasets/ml_datasets/_terraform/provider.tf b/datasets/ml_datasets/_terraform/provider.tf index fc45f218d..23ab87dcd 100644 --- a/datasets/ml_datasets/_terraform/provider.tf +++ b/datasets/ml_datasets/_terraform/provider.tf @@ -16,9 +16,10 @@ provider "google" { - project = var.project_id - region = var.region - } + project = var.project_id + impersonate_service_account = var.impersonating_acct + region = var.region +} data "google_client_openid_userinfo" "me" {} diff --git a/datasets/ml_datasets/_terraform/variables.tf b/datasets/ml_datasets/_terraform/variables.tf index 9fd0b3853..c3ec7c506 100644 --- a/datasets/ml_datasets/_terraform/variables.tf +++ b/datasets/ml_datasets/_terraform/variables.tf @@ -16,7 +16,7 @@ variable "project_id" {} -variable "project_num" {} +variable "bucket_name_prefix" {} variable "impersonating_acct" {} variable "region" {} variable "env" {} diff --git a/scripts/generate_terraform.py b/scripts/generate_terraform.py index 5e72453fd..4e6997e48 100644 --- a/scripts/generate_terraform.py +++ b/scripts/generate_terraform.py @@ -35,6 +35,7 @@ "tfvars": TEMPLATES_PATH / "terraform.tfvars.jinja2", "variables": TEMPLATES_PATH / "variables.tf.jinja2", "provider": TEMPLATES_PATH / "provider.tf.jinja2", + "backend": TEMPLATES_PATH / "backend.tf.jinja2", } yaml = yaml.YAML(typ="safe") @@ -47,6 +48,8 @@ def main( region: str, impersonating_acct: str, env: str, + tf_state_bucket: str, + tf_state_prefix: str, tf_apply: bool = False, ): validate_bucket_name(bucket_name_prefix) @@ -55,6 +58,7 @@ def main( create_gitignored_env_path(dataset_id, env_path) generate_provider_tf(project_id, dataset_id, region, impersonating_acct, env_path) + generate_backend_tf(dataset_id, tf_state_bucket, tf_state_prefix, env_path) dataset_config = yaml.load(open(DATASETS_PATH / dataset_id / "dataset.yaml")) generate_dataset_tf(dataset_id, project_id, dataset_config, env) @@ -86,7 +90,26 @@ def generate_provider_tf( }, ) - create_file_in_dot_and_project_dirs(dataset_id, contents, "provider.tf", env_path) + create_file_in_dir_tree(dataset_id, contents, "provider.tf", env_path) + + +def generate_backend_tf( + dataset_id: str, tf_state_bucket: str, tf_state_prefix: str, env_path: pathlib.Path +): + if not tf_state_bucket: + return + + contents = apply_substitutions_to_template( + TEMPLATE_PATHS["backend"], + { + "tf_state_bucket": tf_state_bucket, + "tf_state_prefix": tf_state_prefix, + }, + ) + + create_file_in_dir_tree( + dataset_id, contents, "backend.tf", env_path, use_project_dir=False + ) def generate_dataset_tf(dataset_id: str, project_id: str, config: dict, env: str): @@ -99,7 +122,7 @@ def generate_dataset_tf(dataset_id: str, project_id: str, config: dict, env: str for resource in config["resources"]: contents += tf_resource_contents(resource, copy.deepcopy(subs)) - create_file_in_dot_and_project_dirs( + create_file_in_dir_tree( dataset_id, contents, f"{dataset_id}_dataset.tf", PROJECT_ROOT / f".{env}" ) @@ -130,14 +153,14 @@ def generate_pipeline_tf( for resource in config["resources"]: contents += tf_resource_contents(resource, {**subs, **resource}) - create_file_in_dot_and_project_dirs( + create_file_in_dir_tree( dataset_id, contents, f"{pipeline_id}_pipeline.tf", env_path ) def generate_variables_tf(dataset_id: str, env_path: pathlib.Path): contents = pathlib.Path(TEMPLATE_PATHS["variables"]).read_text() - create_file_in_dot_and_project_dirs(dataset_id, contents, "variables.tf", env_path) + create_file_in_dir_tree(dataset_id, contents, "variables.tf", env_path) def generate_tfvars_file( @@ -219,14 +242,24 @@ def create_gitignored_env_path(dataset_id: str, env_path: pathlib.Path): (env_path / "datasets" / dataset_id).mkdir(parents=True, exist_ok=True) -def create_file_in_dot_and_project_dirs( - dataset_id: str, contents: str, filename: str, env_path: pathlib.Path +def create_file_in_dir_tree( + dataset_id: str, + contents: str, + filename: str, + env_path: pathlib.Path, + use_env_dir: bool = True, + use_project_dir: bool = True, ): filepaths = [] - for prefix in ( - env_path / "datasets" / dataset_id / "_terraform", - DATASETS_PATH / dataset_id / "_terraform", - ): + prefixes = [] + + if use_env_dir: + prefixes.append(env_path / "datasets" / dataset_id / "_terraform") + + if use_project_dir: + prefixes.append(DATASETS_PATH / dataset_id / "_terraform") + + for prefix in prefixes: if not prefix.exists(): prefix.mkdir(parents=True, exist_ok=True) @@ -303,6 +336,19 @@ def apply_substitutions_to_template(template: pathlib.Path, subs: dict) -> str: dest="bucket_name_prefix", help="The prefix to use for GCS bucket names for global uniqueness", ) + parser.add_argument( + "--tf-state-bucket", + type=str, + dest="tf_state_bucket", + help="The GCS bucket name for the Terraform remote state", + ) + parser.add_argument( + "--tf-state-prefix", + type=str, + default="terraform/state", + dest="tf_state_prefix", + help="The GCS bucket prefix for the Terraform remote state", + ) parser.add_argument( "-e", "--env", @@ -335,5 +381,7 @@ def apply_substitutions_to_template(template: pathlib.Path, subs: dict) -> str: args.region, args.impersonating_acct, args.env, + args.tf_state_bucket, + args.tf_state_prefix, args.tf_apply, ) diff --git a/templates/terraform/backend.tf.jinja2 b/templates/terraform/backend.tf.jinja2 new file mode 100644 index 000000000..e0a26b879 --- /dev/null +++ b/templates/terraform/backend.tf.jinja2 @@ -0,0 +1,23 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +terraform { + backend "gcs" { + bucket = "{{ tf_state_bucket }}" + prefix = "{{ tf_state_prefix }}" + } +} diff --git a/tests/scripts/test_generate_terraform.py b/tests/scripts/test_generate_terraform.py index 38d94979e..699e2610a 100644 --- a/tests/scripts/test_generate_terraform.py +++ b/tests/scripts/test_generate_terraform.py @@ -89,6 +89,16 @@ def bq_table_resource() -> dict: } +@pytest.fixture +def tf_state_bucket() -> str: + return "test-terraform-state-bucket" + + +@pytest.fixture +def tf_state_prefix() -> str: + return "test/terraform/state" + + @pytest.fixture def env() -> str: return "test" @@ -107,6 +117,8 @@ def test_main_generates_tf_files( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ): shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml") shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml") @@ -118,6 +130,8 @@ def test_main_generates_tf_files( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) for path_prefix in ( @@ -129,6 +143,69 @@ def test_main_generates_tf_files( assert (path_prefix / f"{pipeline_path.name}_pipeline.tf").exists() assert (path_prefix / "variables.tf").exists() + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "terraform.tfvars" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" + ).exists() + + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "backend.tf" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "backend.tf" + ).exists() + + +def test_main_without_tf_remote_state_generates_tf_files_except_backend_tf( + dataset_path, + pipeline_path, + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, +): + shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml") + shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml") + + generate_terraform.main( + dataset_path.name, + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, + None, + None, + ) + + for path_prefix in ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform", + generate_terraform.DATASETS_PATH / dataset_path.name / "_terraform", + ): + assert (path_prefix / "provider.tf").exists() + assert (path_prefix / f"{dataset_path.name}_dataset.tf").exists() + assert (path_prefix / f"{pipeline_path.name}_pipeline.tf").exists() + assert (path_prefix / "variables.tf").exists() + assert not (path_prefix / "backend.tf").exists() + + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "terraform.tfvars" + ).exists() + assert ( ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" ).exists() @@ -146,6 +223,8 @@ def test_main_with_multiple_pipelines( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ): assert pipeline_path.name != pipeline_path_2.name @@ -160,6 +239,8 @@ def test_main_with_multiple_pipelines( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) for path_prefix in ( @@ -172,13 +253,38 @@ def test_main_with_multiple_pipelines( assert (path_prefix / f"{pipeline_path_2.name}_pipeline.tf").exists() assert (path_prefix / "variables.tf").exists() + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "terraform.tfvars" + ).exists() + assert ( ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" ).exists() + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "backend.tf" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "backend.tf" + ).exists() + def test_dataset_without_any_pipelines( - dataset_path, project_id, bucket_name_prefix, region, impersonating_acct, env + dataset_path, + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, + tf_state_bucket, + tf_state_prefix, ): shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml") @@ -189,6 +295,8 @@ def test_dataset_without_any_pipelines( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) for path_prefix in ( @@ -198,9 +306,37 @@ def test_dataset_without_any_pipelines( assert (path_prefix / "provider.tf").exists() assert (path_prefix / f"{dataset_path.name}_dataset.tf").exists() + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "terraform.tfvars" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" + ).exists() + + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "backend.tf" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "backend.tf" + ).exists() + def test_dataset_path_does_not_exist( - project_id, bucket_name_prefix, region, impersonating_acct, env + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, + tf_state_bucket, + tf_state_prefix, ): with pytest.raises(FileNotFoundError): generate_terraform.main( @@ -210,6 +346,8 @@ def test_dataset_path_does_not_exist( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) @@ -221,6 +359,8 @@ def test_generated_tf_files_contain_license_headers( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ): shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml") shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml") @@ -232,6 +372,8 @@ def test_generated_tf_files_contain_license_headers( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) license_header = pathlib.Path( @@ -255,6 +397,10 @@ def test_generated_tf_files_contain_license_headers( ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" ).read_text().count(license_header) == 1 + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "backend.tf" + ).read_text().count(license_header) == 1 + def test_bucket_names_must_not_contain_dots_and_google(): for name in ( @@ -283,6 +429,8 @@ def test_bucket_prefixes_must_use_hyphens_instead_of_underscores( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ): for prefix in ( "test_prefix", @@ -296,6 +444,8 @@ def test_bucket_prefixes_must_use_hyphens_instead_of_underscores( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) @@ -318,6 +468,8 @@ def test_validation_on_generated_tf_files_in_dot_env_dir( region, impersonating_acct, env, + None, + None, ) env_dataset_path = ENV_DATASETS_PATH / dataset_path.name @@ -346,6 +498,8 @@ def test_validation_on_generated_tf_files_in_project_dir( region, impersonating_acct, env, + None, + None, ) project_dataset_path = generate_terraform.DATASETS_PATH / dataset_path.name