Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Terraform resource names can't start with digits, but BQ tables can #70

Merged
merged 3 commits into from Jun 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions scripts/generate_terraform.py
Expand Up @@ -203,6 +203,14 @@ def customize_template_subs(resource: dict, subs: dict) -> dict:
subs["uniform_bucket_level_access"] = resource.get(
"uniform_bucket_level_access"
)
elif resource["type"] == "bigquery_table":
# Terraform resource names cannot start with digits, but BigQuery allows
# table names that start with digits. We prepend `bqt_` to table names
# that doesn't comply with Terraform's naming rule.
if resource["table_id"][0].isdigit():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIL isdigit() 😁

subs["tf_resource_name"] = "bqt_" + resource["table_id"]
else:
subs["tf_resource_name"] = resource["table_id"]
return subs


Expand Down
6 changes: 3 additions & 3 deletions templates/terraform/google_bigquery_table.tf.jinja2
Expand Up @@ -15,7 +15,7 @@
*/


resource "google_bigquery_table" "{{ table_id }}" {
resource "google_bigquery_table" "{{ tf_resource_name }}" {
project = var.project_id
dataset_id = "{{ dataset_id }}"
table_id = "{{ table_id }}"
Expand All @@ -34,9 +34,9 @@ resource "google_bigquery_table" "{{ table_id }}" {
}

output "bigquery_table-{{ table_id }}-table_id" {
value = google_bigquery_table.{{ table_id }}.table_id
value = google_bigquery_table.{{ tf_resource_name }}.table_id
}

output "bigquery_table-{{ table_id }}-id" {
value = google_bigquery_table.{{ table_id }}.id
value = google_bigquery_table.{{ tf_resource_name }}.id
}
67 changes: 67 additions & 0 deletions tests/scripts/test_generate_terraform.py
Expand Up @@ -14,6 +14,7 @@


import pathlib
import random
import re
import shutil
import subprocess
Expand Down Expand Up @@ -584,6 +585,72 @@ def test_pipeline_tf_has_no_bq_table_description_when_unspecified(
assert not re.search(r"description\s+\=", result.group(1))


def test_bq_table_name_starts_with_digits_but_tf_resource_name_does_not(
dataset_path,
pipeline_path,
project_id,
bucket_name_prefix,
region,
impersonating_acct,
env,
):
shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml")
shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml")

config = yaml.load(open(pipeline_path / "pipeline.yaml"))
table_name_starting_with_digit = f"{str(random.randint(0, 9))}_table"

# In the YAML config, set the BigQuery table name to start with a digit
bq_table = next(
(r for r in config["resources"] if r["type"] == "bigquery_table"), None
)
bq_table["table_id"] = table_name_starting_with_digit
with open(pipeline_path / "pipeline.yaml", "w") as file:
yaml.dump(config, file)

generate_terraform.main(
dataset_path.name,
project_id,
bucket_name_prefix,
region,
impersonating_acct,
env,
None,
None,
)

# Match the Terraform resource name and the table_id value in the BigQuery
# table's resource definition. As a concrete example, substrings in
# ALL_CAPS are matched below:
#
# resource "google_bigquery_table" "RESOURCE_NAME_STARTING_WITH_NONDIGIT" {
# description = ""
# table_id = "TABLE_NAME_STARTING_WITH_DIGIT"
# }
tf_resource_regexp = r"\"google_bigquery_table\" \"([a-zA-Z0-9_-]+)\" .*?"
table_id_regexp = r"table_id\s+\= \"(.*?)\"\n"
matcher = re.compile(
tf_resource_regexp + table_id_regexp,
flags=re.MULTILINE | re.DOTALL,
)

for path_prefix in (
ENV_DATASETS_PATH / dataset_path.name / "_terraform",
generate_terraform.DATASETS_PATH / dataset_path.name / "_terraform",
):
result = matcher.search(
(path_prefix / f"{pipeline_path.name}_pipeline.tf").read_text()
)

tf_resource_name = result.group(1)
table_id = result.group(2)

assert table_id == table_name_starting_with_digit
assert not tf_resource_name[0].isdigit()
assert table_id[0].isdigit()
assert table_id in tf_resource_name


def test_bucket_names_must_not_contain_dots_and_google():
for name in (
"test.bucket.name",
Expand Down