From 1546e8273ca8670afb0a99914a21d7fddcfc2d92 Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Tue, 25 May 2021 23:55:39 -0400 Subject: [PATCH 1/8] jinja template for backend.tf --- templates/terraform/backend.tf.jinja2 | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 templates/terraform/backend.tf.jinja2 diff --git a/templates/terraform/backend.tf.jinja2 b/templates/terraform/backend.tf.jinja2 new file mode 100644 index 000000000..ec3a2ecbf --- /dev/null +++ b/templates/terraform/backend.tf.jinja2 @@ -0,0 +1,23 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +terraform { + backend "gcs" { + bucket = {{ tf_state_bucket }} + prefix = {{ tf_state_prefix }} + } +} From c316d30f4a45fe2c4adc567b5d8723ab9255c664 Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Tue, 25 May 2021 23:56:51 -0400 Subject: [PATCH 2/8] template path for backend.tf --- scripts/generate_terraform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/generate_terraform.py b/scripts/generate_terraform.py index 5e72453fd..a44572186 100644 --- a/scripts/generate_terraform.py +++ b/scripts/generate_terraform.py @@ -35,6 +35,7 @@ "tfvars": TEMPLATES_PATH / "terraform.tfvars.jinja2", "variables": TEMPLATES_PATH / "variables.tf.jinja2", "provider": TEMPLATES_PATH / "provider.tf.jinja2", + "backend": TEMPLATES_PATH / "backend.tf.jinja2", } yaml = yaml.YAML(typ="safe") From cac78fecace806b8ca4051134939e73bce1a1102 Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Tue, 25 May 2021 23:58:59 -0400 Subject: [PATCH 3/8] add backend.tf file generation in generate_terraform.py script --- scripts/generate_terraform.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/scripts/generate_terraform.py b/scripts/generate_terraform.py index a44572186..3f47b7f65 100644 --- a/scripts/generate_terraform.py +++ b/scripts/generate_terraform.py @@ -48,6 +48,8 @@ def main( region: str, impersonating_acct: str, env: str, + tf_state_bucket: str, + tf_state_prefix: str, tf_apply: bool = False, ): validate_bucket_name(bucket_name_prefix) @@ -56,6 +58,7 @@ def main( create_gitignored_env_path(dataset_id, env_path) generate_provider_tf(project_id, dataset_id, region, impersonating_acct, env_path) + generate_backend_tf(dataset_id, tf_state_bucket, tf_state_prefix, env_path) dataset_config = yaml.load(open(DATASETS_PATH / dataset_id / "dataset.yaml")) generate_dataset_tf(dataset_id, project_id, dataset_config, env) @@ -90,6 +93,23 @@ def generate_provider_tf( create_file_in_dot_and_project_dirs(dataset_id, contents, "provider.tf", env_path) +def generate_backend_tf( + dataset_id: str, tf_state_bucket: str, tf_state_prefix: str, env_path: pathlib.Path +): + if not tf_state_bucket: + return + + contents = apply_substitutions_to_template( + TEMPLATE_PATHS["backend"], + { + "tf_state_backend": tf_state_bucket, + "tf_state_prefix": tf_state_prefix, + }, + ) + + create_file_in_dot_and_project_dirs(dataset_id, contents, "backend.tf", env_path) + + def generate_dataset_tf(dataset_id: str, project_id: str, config: dict, env: str): subs = {"project_id": project_id, "dataset_id": dataset_id, "env": env} @@ -304,6 +324,19 @@ def apply_substitutions_to_template(template: pathlib.Path, subs: dict) -> str: dest="bucket_name_prefix", help="The prefix to use for GCS bucket names for global uniqueness", ) + parser.add_argument( + "--tf-state-bucket", + type=str, + dest="tf_state_bucket", + help="The GCS bucket name for the Terraform remote state", + ) + parser.add_argument( + "--tf-state-prefix", + type=str, + default="terraform/state", + dest="tf_state_prefix", + help="The GCS bucket prefix for the Terraform remote state", + ) parser.add_argument( "-e", "--env", @@ -336,5 +369,7 @@ def apply_substitutions_to_template(template: pathlib.Path, subs: dict) -> str: args.region, args.impersonating_acct, args.env, + args.tf_state_bucket, + args.tf_state_prefix, args.tf_apply, ) From 15ad8f5b40d44bb9efd5480a655a64e287a4aa14 Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Wed, 26 May 2021 15:54:31 -0400 Subject: [PATCH 4/8] refactor create file function to be selective of dir tree --- scripts/generate_terraform.py | 36 +++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/scripts/generate_terraform.py b/scripts/generate_terraform.py index 3f47b7f65..4e6997e48 100644 --- a/scripts/generate_terraform.py +++ b/scripts/generate_terraform.py @@ -90,7 +90,7 @@ def generate_provider_tf( }, ) - create_file_in_dot_and_project_dirs(dataset_id, contents, "provider.tf", env_path) + create_file_in_dir_tree(dataset_id, contents, "provider.tf", env_path) def generate_backend_tf( @@ -102,12 +102,14 @@ def generate_backend_tf( contents = apply_substitutions_to_template( TEMPLATE_PATHS["backend"], { - "tf_state_backend": tf_state_bucket, + "tf_state_bucket": tf_state_bucket, "tf_state_prefix": tf_state_prefix, }, ) - create_file_in_dot_and_project_dirs(dataset_id, contents, "backend.tf", env_path) + create_file_in_dir_tree( + dataset_id, contents, "backend.tf", env_path, use_project_dir=False + ) def generate_dataset_tf(dataset_id: str, project_id: str, config: dict, env: str): @@ -120,7 +122,7 @@ def generate_dataset_tf(dataset_id: str, project_id: str, config: dict, env: str for resource in config["resources"]: contents += tf_resource_contents(resource, copy.deepcopy(subs)) - create_file_in_dot_and_project_dirs( + create_file_in_dir_tree( dataset_id, contents, f"{dataset_id}_dataset.tf", PROJECT_ROOT / f".{env}" ) @@ -151,14 +153,14 @@ def generate_pipeline_tf( for resource in config["resources"]: contents += tf_resource_contents(resource, {**subs, **resource}) - create_file_in_dot_and_project_dirs( + create_file_in_dir_tree( dataset_id, contents, f"{pipeline_id}_pipeline.tf", env_path ) def generate_variables_tf(dataset_id: str, env_path: pathlib.Path): contents = pathlib.Path(TEMPLATE_PATHS["variables"]).read_text() - create_file_in_dot_and_project_dirs(dataset_id, contents, "variables.tf", env_path) + create_file_in_dir_tree(dataset_id, contents, "variables.tf", env_path) def generate_tfvars_file( @@ -240,14 +242,24 @@ def create_gitignored_env_path(dataset_id: str, env_path: pathlib.Path): (env_path / "datasets" / dataset_id).mkdir(parents=True, exist_ok=True) -def create_file_in_dot_and_project_dirs( - dataset_id: str, contents: str, filename: str, env_path: pathlib.Path +def create_file_in_dir_tree( + dataset_id: str, + contents: str, + filename: str, + env_path: pathlib.Path, + use_env_dir: bool = True, + use_project_dir: bool = True, ): filepaths = [] - for prefix in ( - env_path / "datasets" / dataset_id / "_terraform", - DATASETS_PATH / dataset_id / "_terraform", - ): + prefixes = [] + + if use_env_dir: + prefixes.append(env_path / "datasets" / dataset_id / "_terraform") + + if use_project_dir: + prefixes.append(DATASETS_PATH / dataset_id / "_terraform") + + for prefix in prefixes: if not prefix.exists(): prefix.mkdir(parents=True, exist_ok=True) From d62196be0eb8b32ed7e129e3ba1223f2dbf9bd55 Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Wed, 26 May 2021 15:56:47 -0400 Subject: [PATCH 5/8] fixed backend.tf template with missing quotes --- templates/terraform/backend.tf.jinja2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/terraform/backend.tf.jinja2 b/templates/terraform/backend.tf.jinja2 index ec3a2ecbf..e0a26b879 100644 --- a/templates/terraform/backend.tf.jinja2 +++ b/templates/terraform/backend.tf.jinja2 @@ -17,7 +17,7 @@ terraform { backend "gcs" { - bucket = {{ tf_state_bucket }} - prefix = {{ tf_state_prefix }} + bucket = "{{ tf_state_bucket }}" + prefix = "{{ tf_state_prefix }}" } } From 6c2b3411cb0936a105718d58b209fe6b03f83190 Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Wed, 26 May 2021 15:57:24 -0400 Subject: [PATCH 6/8] revised generate_terraform tests to account for remote state file --- tests/scripts/test_generate_terraform.py | 158 ++++++++++++++++++++++- 1 file changed, 156 insertions(+), 2 deletions(-) diff --git a/tests/scripts/test_generate_terraform.py b/tests/scripts/test_generate_terraform.py index 38d94979e..699e2610a 100644 --- a/tests/scripts/test_generate_terraform.py +++ b/tests/scripts/test_generate_terraform.py @@ -89,6 +89,16 @@ def bq_table_resource() -> dict: } +@pytest.fixture +def tf_state_bucket() -> str: + return "test-terraform-state-bucket" + + +@pytest.fixture +def tf_state_prefix() -> str: + return "test/terraform/state" + + @pytest.fixture def env() -> str: return "test" @@ -107,6 +117,8 @@ def test_main_generates_tf_files( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ): shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml") shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml") @@ -118,6 +130,8 @@ def test_main_generates_tf_files( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) for path_prefix in ( @@ -129,6 +143,69 @@ def test_main_generates_tf_files( assert (path_prefix / f"{pipeline_path.name}_pipeline.tf").exists() assert (path_prefix / "variables.tf").exists() + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "terraform.tfvars" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" + ).exists() + + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "backend.tf" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "backend.tf" + ).exists() + + +def test_main_without_tf_remote_state_generates_tf_files_except_backend_tf( + dataset_path, + pipeline_path, + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, +): + shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml") + shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml") + + generate_terraform.main( + dataset_path.name, + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, + None, + None, + ) + + for path_prefix in ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform", + generate_terraform.DATASETS_PATH / dataset_path.name / "_terraform", + ): + assert (path_prefix / "provider.tf").exists() + assert (path_prefix / f"{dataset_path.name}_dataset.tf").exists() + assert (path_prefix / f"{pipeline_path.name}_pipeline.tf").exists() + assert (path_prefix / "variables.tf").exists() + assert not (path_prefix / "backend.tf").exists() + + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "terraform.tfvars" + ).exists() + assert ( ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" ).exists() @@ -146,6 +223,8 @@ def test_main_with_multiple_pipelines( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ): assert pipeline_path.name != pipeline_path_2.name @@ -160,6 +239,8 @@ def test_main_with_multiple_pipelines( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) for path_prefix in ( @@ -172,13 +253,38 @@ def test_main_with_multiple_pipelines( assert (path_prefix / f"{pipeline_path_2.name}_pipeline.tf").exists() assert (path_prefix / "variables.tf").exists() + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "terraform.tfvars" + ).exists() + assert ( ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" ).exists() + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "backend.tf" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "backend.tf" + ).exists() + def test_dataset_without_any_pipelines( - dataset_path, project_id, bucket_name_prefix, region, impersonating_acct, env + dataset_path, + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, + tf_state_bucket, + tf_state_prefix, ): shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml") @@ -189,6 +295,8 @@ def test_dataset_without_any_pipelines( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) for path_prefix in ( @@ -198,9 +306,37 @@ def test_dataset_without_any_pipelines( assert (path_prefix / "provider.tf").exists() assert (path_prefix / f"{dataset_path.name}_dataset.tf").exists() + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "terraform.tfvars" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" + ).exists() + + assert not ( + generate_terraform.DATASETS_PATH + / dataset_path.name + / "_terraform" + / "backend.tf" + ).exists() + + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "backend.tf" + ).exists() + def test_dataset_path_does_not_exist( - project_id, bucket_name_prefix, region, impersonating_acct, env + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, + tf_state_bucket, + tf_state_prefix, ): with pytest.raises(FileNotFoundError): generate_terraform.main( @@ -210,6 +346,8 @@ def test_dataset_path_does_not_exist( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) @@ -221,6 +359,8 @@ def test_generated_tf_files_contain_license_headers( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ): shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml") shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml") @@ -232,6 +372,8 @@ def test_generated_tf_files_contain_license_headers( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) license_header = pathlib.Path( @@ -255,6 +397,10 @@ def test_generated_tf_files_contain_license_headers( ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "terraform.tfvars" ).read_text().count(license_header) == 1 + assert ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform" / "backend.tf" + ).read_text().count(license_header) == 1 + def test_bucket_names_must_not_contain_dots_and_google(): for name in ( @@ -283,6 +429,8 @@ def test_bucket_prefixes_must_use_hyphens_instead_of_underscores( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ): for prefix in ( "test_prefix", @@ -296,6 +444,8 @@ def test_bucket_prefixes_must_use_hyphens_instead_of_underscores( region, impersonating_acct, env, + tf_state_bucket, + tf_state_prefix, ) @@ -318,6 +468,8 @@ def test_validation_on_generated_tf_files_in_dot_env_dir( region, impersonating_acct, env, + None, + None, ) env_dataset_path = ENV_DATASETS_PATH / dataset_path.name @@ -346,6 +498,8 @@ def test_validation_on_generated_tf_files_in_project_dir( region, impersonating_acct, env, + None, + None, ) project_dataset_path = generate_terraform.DATASETS_PATH / dataset_path.name From 92054af0ebc5b62cbb8668047c7088c87b04f18b Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Wed, 26 May 2021 16:00:03 -0400 Subject: [PATCH 7/8] rerun generate terraform files for ml_datasets --- datasets/ml_datasets/_terraform/ml_datasets_dataset.tf | 2 +- datasets/ml_datasets/_terraform/penguins_pipeline.tf | 2 +- datasets/ml_datasets/_terraform/provider.tf | 7 ++++--- datasets/ml_datasets/_terraform/variables.tf | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/datasets/ml_datasets/_terraform/ml_datasets_dataset.tf b/datasets/ml_datasets/_terraform/ml_datasets_dataset.tf index cbc8728a4..c565a3f25 100644 --- a/datasets/ml_datasets/_terraform/ml_datasets_dataset.tf +++ b/datasets/ml_datasets/_terraform/ml_datasets_dataset.tf @@ -18,7 +18,7 @@ resource "google_bigquery_dataset" "ml_datasets" { dataset_id = "ml_datasets" project = var.project_id - } +} output "bigquery_dataset-ml_datasets-dataset_id" { value = google_bigquery_dataset.ml_datasets.dataset_id diff --git a/datasets/ml_datasets/_terraform/penguins_pipeline.tf b/datasets/ml_datasets/_terraform/penguins_pipeline.tf index a61228ad6..88c5a1b19 100644 --- a/datasets/ml_datasets/_terraform/penguins_pipeline.tf +++ b/datasets/ml_datasets/_terraform/penguins_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "penguins" { dataset_id = "ml_datasets" table_id = "penguins" - + depends_on = [ google_bigquery_dataset.ml_datasets diff --git a/datasets/ml_datasets/_terraform/provider.tf b/datasets/ml_datasets/_terraform/provider.tf index fc45f218d..23ab87dcd 100644 --- a/datasets/ml_datasets/_terraform/provider.tf +++ b/datasets/ml_datasets/_terraform/provider.tf @@ -16,9 +16,10 @@ provider "google" { - project = var.project_id - region = var.region - } + project = var.project_id + impersonate_service_account = var.impersonating_acct + region = var.region +} data "google_client_openid_userinfo" "me" {} diff --git a/datasets/ml_datasets/_terraform/variables.tf b/datasets/ml_datasets/_terraform/variables.tf index 9fd0b3853..c3ec7c506 100644 --- a/datasets/ml_datasets/_terraform/variables.tf +++ b/datasets/ml_datasets/_terraform/variables.tf @@ -16,7 +16,7 @@ variable "project_id" {} -variable "project_num" {} +variable "bucket_name_prefix" {} variable "impersonating_acct" {} variable "region" {} variable "env" {} From f7c8af7296d3fbb62822757c229a5b5cb907fe19 Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Wed, 26 May 2021 18:09:04 -0400 Subject: [PATCH 8/8] updated README for TF remote state usage --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index f613e3f3a..21069611d 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,8 @@ $ python scripts/generate_terraform.py \ --region REGION \ --bucket-name-prefix UNIQUE_BUCKET_PREFIX \ [--env] dev \ + [--tf-state-bucket] \ + [--tf-state-prefix] \ [--tf-apply] \ [--impersonating-acct] IMPERSONATING_SERVICE_ACCT ``` @@ -89,6 +91,8 @@ This generates Terraform files (`*.tf`) in a `_terraform` directory inside that The `--bucket-name-prefix` is used to ensure that the buckets created by different environments and contributors are kept unique. This is to satisfy the rule where bucket names must be globally unique across all of GCS. Use hyphenated names (`some-prefix-123`) instead of snakecase or underscores (`some_prefix_123`). +The `--tf-state-bucket` and `--tf-state-prefix` parameters can be optionally used if one needs to use a remote store for the Terraform state. This will create a `backend.tf` file that points to the GCS bucket and prefix to use in storing the Terraform state. For more info, see the [Terraform docs for using GCS backends](https://www.terraform.io/docs/language/settings/backends/gcs.html). + In addition, the command above creates a "dot" directory in the project root. The directory name is the value you pass to the `--env` parameter of the command. If no `--env` argument was passed, the value defaults to `dev` (which generates the `.dev` folder). Consider this "dot" directory as your own dedicated space for prototyping. The files and variables created in that directory will use an isolated environment. All such directories are gitignored.