diff --git a/datasets/san_francisco_311_service_requests/311_service_requests/311_service_requests_dag.py b/datasets/san_francisco_311/311_service_requests/311_service_requests_dag.py similarity index 93% rename from datasets/san_francisco_311_service_requests/311_service_requests/311_service_requests_dag.py rename to datasets/san_francisco_311/311_service_requests/311_service_requests_dag.py index d5014660f..a75cddf01 100644 --- a/datasets/san_francisco_311_service_requests/311_service_requests/311_service_requests_dag.py +++ b/datasets/san_francisco_311/311_service_requests/311_service_requests_dag.py @@ -25,7 +25,7 @@ with DAG( - dag_id="san_francisco_311_service_requests.311_service_requests", + dag_id="san_francisco_311.311_service_requests", default_args=default_args, max_active_runs=1, schedule_interval="@daily", @@ -56,14 +56,14 @@ } }, image_pull_policy="Always", - image="{{ var.json.san_francisco_311_service_requests.container_registry.run_csv_transform_kub }}", + image="{{ var.json.san_francisco_311.container_registry.run_csv_transform_kub }}", env_vars={ "SOURCE_URL": "https://data.sfgov.org/api/views/vw6y-z8j6/rows.csv", "SOURCE_FILE": "files/data.csv", "TARGET_FILE": "files/data_output.csv", "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", - "TARGET_GCS_PATH": "data/san_francisco_311_service_requests/311_service_requests/data_output.csv", + "TARGET_GCS_PATH": "data/san_francisco_311/311_service_requests/data_output.csv", }, resources={"limit_memory": "8G", "limit_cpu": "3"}, ) @@ -72,11 +72,9 @@ load_to_bq = gcs_to_bigquery.GCSToBigQueryOperator( task_id="load_to_bq", bucket="{{ var.value.composer_bucket }}", - source_objects=[ - "data/san_francisco_311_service_requests/311_service_requests/data_output.csv" - ], + source_objects=["data/san_francisco_311/311_service_requests/data_output.csv"], source_format="CSV", - destination_project_dataset_table="san_francisco_311_service_requests.311_service_requests", + destination_project_dataset_table="san_francisco_311.311_service_requests", skip_leading_rows=1, allow_quoted_newlines=True, write_disposition="WRITE_TRUNCATE", diff --git a/datasets/san_francisco_311_service_requests/311_service_requests/pipeline.yaml b/datasets/san_francisco_311/311_service_requests/pipeline.yaml similarity index 92% rename from datasets/san_francisco_311_service_requests/311_service_requests/pipeline.yaml rename to datasets/san_francisco_311/311_service_requests/pipeline.yaml index dde4fcb25..783b87882 100644 --- a/datasets/san_francisco_311_service_requests/311_service_requests/pipeline.yaml +++ b/datasets/san_francisco_311/311_service_requests/pipeline.yaml @@ -17,7 +17,7 @@ resources: - type: bigquery_table table_id: "311_service_requests" - description: "san_francisco_311_service_requests" + description: "san_francisco_311" dag: airflow_version: 2 @@ -52,14 +52,14 @@ dag: values: - "pool-e2-standard-4" image_pull_policy: "Always" - image: "{{ var.json.san_francisco_311_service_requests.container_registry.run_csv_transform_kub }}" + image: "{{ var.json.san_francisco_311.container_registry.run_csv_transform_kub }}" env_vars: SOURCE_URL: "https://data.sfgov.org/api/views/vw6y-z8j6/rows.csv" SOURCE_FILE: "files/data.csv" TARGET_FILE: "files/data_output.csv" CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" - TARGET_GCS_PATH: "data/san_francisco_311_service_requests/311_service_requests/data_output.csv" + TARGET_GCS_PATH: "data/san_francisco_311/311_service_requests/data_output.csv" resources: limit_memory: "8G" limit_cpu: "3" @@ -70,9 +70,9 @@ dag: args: task_id: "load_to_bq" bucket: "{{ var.value.composer_bucket }}" - source_objects: ["data/san_francisco_311_service_requests/311_service_requests/data_output.csv"] + source_objects: ["data/san_francisco_311/311_service_requests/data_output.csv"] source_format: "CSV" - destination_project_dataset_table: "san_francisco_311_service_requests.311_service_requests" + destination_project_dataset_table: "san_francisco_311.311_service_requests" skip_leading_rows: 1 allow_quoted_newlines: True write_disposition: "WRITE_TRUNCATE" diff --git a/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/Dockerfile b/datasets/san_francisco_311/_images/run_csv_transform_kub/Dockerfile similarity index 100% rename from datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/Dockerfile rename to datasets/san_francisco_311/_images/run_csv_transform_kub/Dockerfile diff --git a/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/csv_transform.py b/datasets/san_francisco_311/_images/run_csv_transform_kub/csv_transform.py similarity index 100% rename from datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/csv_transform.py rename to datasets/san_francisco_311/_images/run_csv_transform_kub/csv_transform.py diff --git a/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/requirements.txt b/datasets/san_francisco_311/_images/run_csv_transform_kub/requirements.txt similarity index 100% rename from datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/requirements.txt rename to datasets/san_francisco_311/_images/run_csv_transform_kub/requirements.txt diff --git a/datasets/san_francisco_311_service_requests/_terraform/311_service_requests_pipeline.tf b/datasets/san_francisco_311/_terraform/311_service_requests_pipeline.tf similarity index 57% rename from datasets/san_francisco_311_service_requests/_terraform/311_service_requests_pipeline.tf rename to datasets/san_francisco_311/_terraform/311_service_requests_pipeline.tf index 91e6699a6..73b81c57e 100644 --- a/datasets/san_francisco_311_service_requests/_terraform/311_service_requests_pipeline.tf +++ b/datasets/san_francisco_311/_terraform/311_service_requests_pipeline.tf @@ -15,25 +15,25 @@ */ -resource "google_bigquery_table" "bqt_311_service_requests" { +resource "google_bigquery_table" "bqt_san_francisco_311_311_service_requests" { project = var.project_id - dataset_id = "san_francisco_311_service_requests" + dataset_id = "san_francisco_311" table_id = "311_service_requests" - description = "san_francisco_311_service_requestsspc" + description = "san_francisco_311" depends_on = [ - google_bigquery_dataset.san_francisco_311_service_requests + google_bigquery_dataset.san_francisco_311 ] } -output "bigquery_table-311_service_requests-table_id" { - value = google_bigquery_table.bqt_311_service_requests.table_id +output "bigquery_table-bqt_san_francisco_311_311_service_requests-table_id" { + value = google_bigquery_table.bqt_san_francisco_311_311_service_requests.table_id } -output "bigquery_table-311_service_requests-id" { - value = google_bigquery_table.bqt_311_service_requests.id +output "bigquery_table-bqt_san_francisco_311_311_service_requests-id" { + value = google_bigquery_table.bqt_san_francisco_311_311_service_requests.id } diff --git a/datasets/san_francisco_311_service_requests/_terraform/provider.tf b/datasets/san_francisco_311/_terraform/provider.tf similarity index 100% rename from datasets/san_francisco_311_service_requests/_terraform/provider.tf rename to datasets/san_francisco_311/_terraform/provider.tf diff --git a/datasets/san_francisco_311/_terraform/san_francisco_311_dataset.tf b/datasets/san_francisco_311/_terraform/san_francisco_311_dataset.tf new file mode 100644 index 000000000..a66130727 --- /dev/null +++ b/datasets/san_francisco_311/_terraform/san_francisco_311_dataset.tf @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_dataset" "san_francisco_311" { + dataset_id = "san_francisco_311" + project = var.project_id + description = "San Francisco 311 Service Requests" +} + +output "bigquery_dataset-san_francisco_311-dataset_id" { + value = google_bigquery_dataset.san_francisco_311.dataset_id +} diff --git a/datasets/san_francisco_311_service_requests/_terraform/san_francisco_311_service_requests_dataset.tf b/datasets/san_francisco_311/_terraform/san_francisco_311_service_requests_dataset.tf similarity index 100% rename from datasets/san_francisco_311_service_requests/_terraform/san_francisco_311_service_requests_dataset.tf rename to datasets/san_francisco_311/_terraform/san_francisco_311_service_requests_dataset.tf diff --git a/datasets/san_francisco_311_service_requests/_terraform/variables.tf b/datasets/san_francisco_311/_terraform/variables.tf similarity index 100% rename from datasets/san_francisco_311_service_requests/_terraform/variables.tf rename to datasets/san_francisco_311/_terraform/variables.tf diff --git a/datasets/san_francisco_311_service_requests/dataset.yaml b/datasets/san_francisco_311/dataset.yaml similarity index 82% rename from datasets/san_francisco_311_service_requests/dataset.yaml rename to datasets/san_francisco_311/dataset.yaml index 6008b87f4..211ef932f 100644 --- a/datasets/san_francisco_311_service_requests/dataset.yaml +++ b/datasets/san_francisco_311/dataset.yaml @@ -13,7 +13,7 @@ # limitations under the License. dataset: - name: san_francisco_311_service_requests + name: san_francisco_311 friendly_name: ~ description: ~ dataset_sources: ~ @@ -23,5 +23,5 @@ dataset: resources: - type: bigquery_dataset - dataset_id: san_francisco_311_service_requests - description: san_francisco_311_service_requests + dataset_id: san_francisco_311 + description: San Francisco 311 Service Requests