From 7c0f339f20ca1384eab96a4a3f9cb784f63ab52d Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Tue, 8 Jun 2021 19:22:39 -0400 Subject: [PATCH] Fix: Terraform resource names can't start with digits, but BQ tables can (#70) * modified bq table jinja template to use tf_resource * resource names for BQ tables don't start with digits * tests for BQ table names that start with digits --- scripts/generate_terraform.py | 8 +++ .../terraform/google_bigquery_table.tf.jinja2 | 6 +- tests/scripts/test_generate_terraform.py | 67 +++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/scripts/generate_terraform.py b/scripts/generate_terraform.py index e090fb6dc..11648218a 100644 --- a/scripts/generate_terraform.py +++ b/scripts/generate_terraform.py @@ -203,6 +203,14 @@ def customize_template_subs(resource: dict, subs: dict) -> dict: subs["uniform_bucket_level_access"] = resource.get( "uniform_bucket_level_access" ) + elif resource["type"] == "bigquery_table": + # Terraform resource names cannot start with digits, but BigQuery allows + # table names that start with digits. We prepend `bqt_` to table names + # that doesn't comply with Terraform's naming rule. + if resource["table_id"][0].isdigit(): + subs["tf_resource_name"] = "bqt_" + resource["table_id"] + else: + subs["tf_resource_name"] = resource["table_id"] return subs diff --git a/templates/terraform/google_bigquery_table.tf.jinja2 b/templates/terraform/google_bigquery_table.tf.jinja2 index 43fede553..950f88613 100644 --- a/templates/terraform/google_bigquery_table.tf.jinja2 +++ b/templates/terraform/google_bigquery_table.tf.jinja2 @@ -15,7 +15,7 @@ */ -resource "google_bigquery_table" "{{ table_id }}" { +resource "google_bigquery_table" "{{ tf_resource_name }}" { project = var.project_id dataset_id = "{{ dataset_id }}" table_id = "{{ table_id }}" @@ -34,9 +34,9 @@ resource "google_bigquery_table" "{{ table_id }}" { } output "bigquery_table-{{ table_id }}-table_id" { - value = google_bigquery_table.{{ table_id }}.table_id + value = google_bigquery_table.{{ tf_resource_name }}.table_id } output "bigquery_table-{{ table_id }}-id" { - value = google_bigquery_table.{{ table_id }}.id + value = google_bigquery_table.{{ tf_resource_name }}.id } diff --git a/tests/scripts/test_generate_terraform.py b/tests/scripts/test_generate_terraform.py index 10dc5e2af..505bea234 100644 --- a/tests/scripts/test_generate_terraform.py +++ b/tests/scripts/test_generate_terraform.py @@ -14,6 +14,7 @@ import pathlib +import random import re import shutil import subprocess @@ -584,6 +585,72 @@ def test_pipeline_tf_has_no_bq_table_description_when_unspecified( assert not re.search(r"description\s+\=", result.group(1)) +def test_bq_table_name_starts_with_digits_but_tf_resource_name_does_not( + dataset_path, + pipeline_path, + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, +): + shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml") + shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml") + + config = yaml.load(open(pipeline_path / "pipeline.yaml")) + table_name_starting_with_digit = f"{str(random.randint(0, 9))}_table" + + # In the YAML config, set the BigQuery table name to start with a digit + bq_table = next( + (r for r in config["resources"] if r["type"] == "bigquery_table"), None + ) + bq_table["table_id"] = table_name_starting_with_digit + with open(pipeline_path / "pipeline.yaml", "w") as file: + yaml.dump(config, file) + + generate_terraform.main( + dataset_path.name, + project_id, + bucket_name_prefix, + region, + impersonating_acct, + env, + None, + None, + ) + + # Match the Terraform resource name and the table_id value in the BigQuery + # table's resource definition. As a concrete example, substrings in + # ALL_CAPS are matched below: + # + # resource "google_bigquery_table" "RESOURCE_NAME_STARTING_WITH_NONDIGIT" { + # description = "" + # table_id = "TABLE_NAME_STARTING_WITH_DIGIT" + # } + tf_resource_regexp = r"\"google_bigquery_table\" \"([a-zA-Z0-9_-]+)\" .*?" + table_id_regexp = r"table_id\s+\= \"(.*?)\"\n" + matcher = re.compile( + tf_resource_regexp + table_id_regexp, + flags=re.MULTILINE | re.DOTALL, + ) + + for path_prefix in ( + ENV_DATASETS_PATH / dataset_path.name / "_terraform", + generate_terraform.DATASETS_PATH / dataset_path.name / "_terraform", + ): + result = matcher.search( + (path_prefix / f"{pipeline_path.name}_pipeline.tf").read_text() + ) + + tf_resource_name = result.group(1) + table_id = result.group(2) + + assert table_id == table_name_starting_with_digit + assert not tf_resource_name[0].isdigit() + assert table_id[0].isdigit() + assert table_id in tf_resource_name + + def test_bucket_names_must_not_contain_dots_and_google(): for name in ( "test.bucket.name",