Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support BigQuery table descriptions #59

Merged
merged 6 commits into from Jun 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions samples/pipeline.yaml
Expand Up @@ -32,6 +32,9 @@ resources:
# table_id
table_id: PIPELINE_FOLDER_NAME

# Description of the table
description: "This is a table description."

dag:
# The DAG acronym stands for directed acyclic graph. This block represents
# your data pipeline along with every property and configuration it needs to
Expand Down
4 changes: 3 additions & 1 deletion templates/terraform/google_bigquery_table.tf.jinja2
Expand Up @@ -20,12 +20,14 @@ resource "google_bigquery_table" "{{ table_id }}" {
dataset_id = "{{ dataset_id }}"
table_id = "{{ table_id }}"

{% if description -%}
description = "{{ description }}"
{%- endif %}
{% if schema -%}
schema = <<EOF
{{ schema }}
EOF
{%- endif %}

depends_on = [
google_bigquery_dataset.{{ dataset_id }}
]
Expand Down
140 changes: 140 additions & 0 deletions tests/scripts/test_generate_terraform.py
Expand Up @@ -14,6 +14,7 @@


import pathlib
import re
import shutil
import subprocess
import tempfile
Expand Down Expand Up @@ -444,6 +445,145 @@ def test_dataset_tf_file_contains_description_when_specified(
) == 1


def test_dataset_tf_has_no_bq_dataset_description_when_unspecified(
dataset_path,
pipeline_path,
project_id,
bucket_name_prefix,
region,
impersonating_acct,
env,
):
shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml")
shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml")

config = yaml.load(open(dataset_path / "dataset.yaml"))

# Get the first bigquery_dataset resource and delete the `description` field
bq_dataset = next(
(r for r in config["resources"] if r["type"] == "bigquery_dataset")
)
del bq_dataset["description"]
with open(dataset_path / "dataset.yaml", "w") as file:
yaml.dump(config, file)

generate_terraform.main(
dataset_path.name,
project_id,
bucket_name_prefix,
region,
impersonating_acct,
env,
None,
None,
)

# Match the "google_bigquery_dataset" properties, i.e. any lines between the
# curly braces, in the *_dataset.tf file
regexp = r"\"google_bigquery_dataset\" \"" + dataset_path.name + r"\" \{(.*?)\}"
bq_dataset_tf_string = re.compile(regexp, flags=re.MULTILINE | re.DOTALL)

for path_prefix in (
ENV_DATASETS_PATH / dataset_path.name / "_terraform",
generate_terraform.DATASETS_PATH / dataset_path.name / "_terraform",
):
result = bq_dataset_tf_string.search(
(path_prefix / f"{dataset_path.name}_dataset.tf").read_text()
)

assert re.search(r"dataset_id\s+\=", result.group(1))
assert not re.search(r"description\s+\=", result.group(1))


def test_pipeline_tf_contains_bq_table_description_when_specified(
adlersantos marked this conversation as resolved.
Show resolved Hide resolved
dataset_path,
pipeline_path,
project_id,
bucket_name_prefix,
region,
impersonating_acct,
env,
):
shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml")
shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml")

generate_terraform.main(
dataset_path.name,
project_id,
bucket_name_prefix,
region,
impersonating_acct,
env,
None,
None,
)

config = yaml.load(open(pipeline_path / "pipeline.yaml"))
bq_table = next(
(r for r in config["resources"] if r["type"] == "bigquery_table"), None
)
assert bq_table
assert bq_table["description"]

for path_prefix in (
ENV_DATASETS_PATH / dataset_path.name / "_terraform",
generate_terraform.DATASETS_PATH / dataset_path.name / "_terraform",
):
assert (path_prefix / f"{pipeline_path.name}_pipeline.tf").read_text().count(
f"description = \"{bq_table['description']}\""
) == 1


def test_pipeline_tf_has_no_bq_table_description_when_unspecified(
dataset_path,
pipeline_path,
project_id,
bucket_name_prefix,
region,
impersonating_acct,
env,
):
shutil.copyfile(SAMPLE_YAML_PATHS["dataset"], dataset_path / "dataset.yaml")
shutil.copyfile(SAMPLE_YAML_PATHS["pipeline"], pipeline_path / "pipeline.yaml")

config = yaml.load(open(pipeline_path / "pipeline.yaml"))

# Get the first bigquery_table resource and delete the `description` field
bq_table = next(
(r for r in config["resources"] if r["type"] == "bigquery_table"), None
)
del bq_table["description"]
with open(pipeline_path / "pipeline.yaml", "w") as file:
yaml.dump(config, file)

generate_terraform.main(
dataset_path.name,
project_id,
bucket_name_prefix,
region,
impersonating_acct,
env,
None,
None,
)

# Match the "google_bigquery_table" properties, i.e. any lines between the
# curly braces, in the *_pipeline.tf file
regexp = r"\"google_bigquery_table\" \"" + bq_table["table_id"] + r"\" \{(.*?)\}"
bq_table_tf_string = re.compile(regexp, flags=re.MULTILINE | re.DOTALL)

for path_prefix in (
ENV_DATASETS_PATH / dataset_path.name / "_terraform",
generate_terraform.DATASETS_PATH / dataset_path.name / "_terraform",
):
result = bq_table_tf_string.search(
(path_prefix / f"{pipeline_path.name}_pipeline.tf").read_text()
)

assert re.search(r"table_id\s+\=", result.group(1))
assert not re.search(r"description\s+\=", result.group(1))


def test_bucket_names_must_not_contain_dots_and_google():
for name in (
"test.bucket.name",
Expand Down