diff --git a/datasets/sunroof/_terraform/sunroof_dataset.tf b/datasets/sunroof/_terraform/sunroof_dataset.tf deleted file mode 100644 index 68d241622..000000000 --- a/datasets/sunroof/_terraform/sunroof_dataset.tf +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Copyright 2021 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -resource "google_bigquery_dataset" "sunroof" { - dataset_id = "sunroof" - project = var.project_id - description = "sunroof" -} - -output "bigquery_dataset-sunroof-dataset_id" { - value = google_bigquery_dataset.sunroof.dataset_id -} diff --git a/datasets/sunroof/_images/run_csv_transform_kub/Dockerfile b/datasets/sunroof_solar/_images/run_csv_transform_kub/Dockerfile similarity index 100% rename from datasets/sunroof/_images/run_csv_transform_kub/Dockerfile rename to datasets/sunroof_solar/_images/run_csv_transform_kub/Dockerfile diff --git a/datasets/sunroof/_images/run_csv_transform_kub/csv_transform.py b/datasets/sunroof_solar/_images/run_csv_transform_kub/csv_transform.py similarity index 100% rename from datasets/sunroof/_images/run_csv_transform_kub/csv_transform.py rename to datasets/sunroof_solar/_images/run_csv_transform_kub/csv_transform.py diff --git a/datasets/sunroof/_images/run_csv_transform_kub/requirements.txt b/datasets/sunroof_solar/_images/run_csv_transform_kub/requirements.txt similarity index 100% rename from datasets/sunroof/_images/run_csv_transform_kub/requirements.txt rename to datasets/sunroof_solar/_images/run_csv_transform_kub/requirements.txt diff --git a/datasets/sunroof/_terraform/provider.tf b/datasets/sunroof_solar/_terraform/provider.tf similarity index 100% rename from datasets/sunroof/_terraform/provider.tf rename to datasets/sunroof_solar/_terraform/provider.tf diff --git a/datasets/sunroof/_terraform/solar_potential_by_census_tract_pipeline.tf b/datasets/sunroof_solar/_terraform/solar_potential_by_censustract_pipeline.tf similarity index 57% rename from datasets/sunroof/_terraform/solar_potential_by_census_tract_pipeline.tf rename to datasets/sunroof_solar/_terraform/solar_potential_by_censustract_pipeline.tf index b8eecce01..1c4896988 100644 --- a/datasets/sunroof/_terraform/solar_potential_by_census_tract_pipeline.tf +++ b/datasets/sunroof_solar/_terraform/solar_potential_by_censustract_pipeline.tf @@ -15,10 +15,10 @@ */ -resource "google_bigquery_table" "solar_potential_by_census_tract" { +resource "google_bigquery_table" "sunroof_solar_solar_potential_by_censustract" { project = var.project_id - dataset_id = "sunroof" - table_id = "solar_potential_by_census_tract" + dataset_id = "sunroof_solar" + table_id = "solar_potential_by_censustract" description = "Sunroof Solar Potential By Census Tract" @@ -26,14 +26,14 @@ resource "google_bigquery_table" "solar_potential_by_census_tract" { depends_on = [ - google_bigquery_dataset.sunroof + google_bigquery_dataset.sunroof_solar ] } -output "bigquery_table-solar_potential_by_census_tract-table_id" { - value = google_bigquery_table.solar_potential_by_census_tract.table_id +output "bigquery_table-sunroof_solar_solar_potential_by_censustract-table_id" { + value = google_bigquery_table.sunroof_solar_solar_potential_by_censustract.table_id } -output "bigquery_table-solar_potential_by_census_tract-id" { - value = google_bigquery_table.solar_potential_by_census_tract.id +output "bigquery_table-sunroof_solar_solar_potential_by_censustract-id" { + value = google_bigquery_table.sunroof_solar_solar_potential_by_censustract.id } diff --git a/datasets/sunroof/_terraform/solar_potential_by_postal_code_pipeline.tf b/datasets/sunroof_solar/_terraform/solar_potential_by_postal_code_pipeline.tf similarity index 61% rename from datasets/sunroof/_terraform/solar_potential_by_postal_code_pipeline.tf rename to datasets/sunroof_solar/_terraform/solar_potential_by_postal_code_pipeline.tf index 82040a0b6..3f14b3584 100644 --- a/datasets/sunroof/_terraform/solar_potential_by_postal_code_pipeline.tf +++ b/datasets/sunroof_solar/_terraform/solar_potential_by_postal_code_pipeline.tf @@ -15,9 +15,9 @@ */ -resource "google_bigquery_table" "solar_potential_by_postal_code" { +resource "google_bigquery_table" "sunroof_solar_solar_potential_by_postal_code" { project = var.project_id - dataset_id = "sunroof" + dataset_id = "sunroof_solar" table_id = "solar_potential_by_postal_code" description = "Sunroof Solar Potential By Postal Code" @@ -26,14 +26,14 @@ resource "google_bigquery_table" "solar_potential_by_postal_code" { depends_on = [ - google_bigquery_dataset.sunroof + google_bigquery_dataset.sunroof_solar ] } -output "bigquery_table-solar_potential_by_postal_code-table_id" { - value = google_bigquery_table.solar_potential_by_postal_code.table_id +output "bigquery_table-sunroof_solar_solar_potential_by_postal_code-table_id" { + value = google_bigquery_table.sunroof_solar_solar_potential_by_postal_code.table_id } -output "bigquery_table-solar_potential_by_postal_code-id" { - value = google_bigquery_table.solar_potential_by_postal_code.id +output "bigquery_table-sunroof_solar_solar_potential_by_postal_code-id" { + value = google_bigquery_table.sunroof_solar_solar_potential_by_postal_code.id } diff --git a/datasets/sunroof_solar/_terraform/sunroof_solar_dataset.tf b/datasets/sunroof_solar/_terraform/sunroof_solar_dataset.tf new file mode 100644 index 000000000..27b78b9ff --- /dev/null +++ b/datasets/sunroof_solar/_terraform/sunroof_solar_dataset.tf @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_dataset" "sunroof_solar" { + dataset_id = "sunroof_solar" + project = var.project_id + description = "Project Sunroof computes how much sunlight hits your roof in a year. Solar viability is determined using a methodology found here: https://www.google.com/get/sunroof/data-explorer/data-explorer-methodology.pdf The use of this data is subject to Google\u0027s Terms of Service. Feel free to include this data from Project Sunroof in other analyses, materials, reports, and communications with the following attribution: Source: Google Project Sunroof data explorer (August 2017)." +} + +output "bigquery_dataset-sunroof_solar-dataset_id" { + value = google_bigquery_dataset.sunroof_solar.dataset_id +} diff --git a/datasets/sunroof/_terraform/variables.tf b/datasets/sunroof_solar/_terraform/variables.tf similarity index 100% rename from datasets/sunroof/_terraform/variables.tf rename to datasets/sunroof_solar/_terraform/variables.tf diff --git a/datasets/sunroof/dataset.yaml b/datasets/sunroof_solar/dataset.yaml similarity index 52% rename from datasets/sunroof/dataset.yaml rename to datasets/sunroof_solar/dataset.yaml index 58de08fae..59973141e 100644 --- a/datasets/sunroof/dataset.yaml +++ b/datasets/sunroof_solar/dataset.yaml @@ -13,14 +13,15 @@ # limitations under the License. dataset: - name: sunroof - friendly_name: sunroof - description: sunroof + name: sunroof_solar + friendly_name: sunroof_solar + description: sunroof_solar dataset_sources: ~ terms_of_use: ~ resources: - type: bigquery_dataset - dataset_id: sunroof - description: sunroof + dataset_id: sunroof_solar + description: |- + Project Sunroof computes how much sunlight hits your roof in a year. Solar viability is determined using a methodology found here: https://www.google.com/get/sunroof/data-explorer/data-explorer-methodology.pdf The use of this data is subject to Google's Terms of Service. Feel free to include this data from Project Sunroof in other analyses, materials, reports, and communications with the following attribution: Source: Google Project Sunroof data explorer (August 2017). diff --git a/datasets/sunroof/solar_potential_by_census_tract/pipeline.yaml b/datasets/sunroof_solar/solar_potential_by_censustract/pipeline.yaml similarity index 96% rename from datasets/sunroof/solar_potential_by_census_tract/pipeline.yaml rename to datasets/sunroof_solar/solar_potential_by_censustract/pipeline.yaml index 2a552e2d3..e6bdb9acb 100644 --- a/datasets/sunroof/solar_potential_by_census_tract/pipeline.yaml +++ b/datasets/sunroof_solar/solar_potential_by_censustract/pipeline.yaml @@ -16,13 +16,13 @@ resources: - type: bigquery_table - table_id: "solar_potential_by_census_tract" + table_id: "solar_potential_by_censustract" description: "Sunroof Solar Potential By Census Tract" dag: airflow_version: 2 initialize: - dag_id: solar_potential_by_census_tract + dag_id: solar_potential_by_censustract default_args: owner: "Google" depends_on_past: False @@ -40,7 +40,7 @@ dag: args: task_id: "transform_csv" - name: "solar_potential_by_census_tract" + name: "solar_potential_by_censustract" namespace: "default" affinity: nodeAffinity: @@ -52,27 +52,27 @@ dag: values: - "pool-e2-standard-4" image_pull_policy: "Always" - image: "{{ var.json.sunroof.container_registry.run_csv_transform_kub }}" + image: "{{ var.json.sunroof_solar.container_registry.run_csv_transform_kub }}" env_vars: SOURCE_URL: "gs://project-sunroof/csv/latest/project-sunroof-census_tract.csv" SOURCE_FILE: "files/data.csv" TARGET_FILE: "files/data_output.csv" CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" - TARGET_GCS_PATH: "data/sunroof/solar_potential_by_census_tract/data_output.csv" + TARGET_GCS_PATH: "data/sunroof/solar_potential_by_censustract/data_output.csv" resources: limit_memory: "8G" limit_cpu: "3" - operator: "GoogleCloudStorageToBigQueryOperator" - description: "Task to load CSV data to a BigQuery table" + description: "Load CSV data to a BigQuery table" args: task_id: "load_to_bq" bucket: "{{ var.value.composer_bucket }}" - source_objects: ["data/sunroof/solar_potential_by_census_tract/data_output.csv"] + source_objects: ["data/sunroof/solar_potential_by_censustract/data_output.csv"] source_format: "CSV" - destination_project_dataset_table: "sunroof.solar_potential_by_census_tract" + destination_project_dataset_table: "sunroof.solar_potential_by_censustract" skip_leading_rows: 1 allow_quoted_newlines: True write_disposition: "WRITE_TRUNCATE" diff --git a/datasets/sunroof/solar_potential_by_census_tract/solar_potential_by_census_tract_dag.py b/datasets/sunroof_solar/solar_potential_by_censustract/solar_potential_by_censustract_dag.py similarity index 97% rename from datasets/sunroof/solar_potential_by_census_tract/solar_potential_by_census_tract_dag.py rename to datasets/sunroof_solar/solar_potential_by_censustract/solar_potential_by_censustract_dag.py index 7d211ef49..ed439c3d3 100644 --- a/datasets/sunroof/solar_potential_by_census_tract/solar_potential_by_census_tract_dag.py +++ b/datasets/sunroof_solar/solar_potential_by_censustract/solar_potential_by_censustract_dag.py @@ -25,7 +25,7 @@ with DAG( - dag_id="sunroof.solar_potential_by_census_tract", + dag_id="sunroof_solar.solar_potential_by_censustract", default_args=default_args, max_active_runs=1, schedule_interval="@daily", @@ -36,7 +36,7 @@ # Run CSV transform within kubernetes pod transform_csv = kubernetes_pod.KubernetesPodOperator( task_id="transform_csv", - name="solar_potential_by_census_tract", + name="solar_potential_by_censustract", namespace="default", affinity={ "nodeAffinity": { @@ -56,14 +56,14 @@ } }, image_pull_policy="Always", - image="{{ var.json.sunroof.container_registry.run_csv_transform_kub }}", + image="{{ var.json.sunroof_solar.container_registry.run_csv_transform_kub }}", env_vars={ "SOURCE_URL": "gs://project-sunroof/csv/latest/project-sunroof-census_tract.csv", "SOURCE_FILE": "files/data.csv", "TARGET_FILE": "files/data_output.csv", "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", - "TARGET_GCS_PATH": "data/sunroof/solar_potential_by_census_tract/data_output.csv", + "TARGET_GCS_PATH": "data/sunroof/solar_potential_by_censustract/data_output.csv", }, resources={"limit_memory": "8G", "limit_cpu": "3"}, ) @@ -72,9 +72,9 @@ load_to_bq = gcs_to_bigquery.GCSToBigQueryOperator( task_id="load_to_bq", bucket="{{ var.value.composer_bucket }}", - source_objects=["data/sunroof/solar_potential_by_census_tract/data_output.csv"], + source_objects=["data/sunroof/solar_potential_by_censustract/data_output.csv"], source_format="CSV", - destination_project_dataset_table="sunroof.solar_potential_by_census_tract", + destination_project_dataset_table="sunroof.solar_potential_by_censustract", skip_leading_rows=1, allow_quoted_newlines=True, write_disposition="WRITE_TRUNCATE", diff --git a/datasets/sunroof/solar_potential_by_postal_code/pipeline.yaml b/datasets/sunroof_solar/solar_potential_by_postal_code/pipeline.yaml similarity index 99% rename from datasets/sunroof/solar_potential_by_postal_code/pipeline.yaml rename to datasets/sunroof_solar/solar_potential_by_postal_code/pipeline.yaml index fcf62e0a9..f789db5c4 100644 --- a/datasets/sunroof/solar_potential_by_postal_code/pipeline.yaml +++ b/datasets/sunroof_solar/solar_potential_by_postal_code/pipeline.yaml @@ -52,7 +52,7 @@ dag: values: - "pool-e2-standard-4" image_pull_policy: "Always" - image: "{{ var.json.sunroof.container_registry.run_csv_transform_kub }}" + image: "{{ var.json.sunroof_solar.container_registry.run_csv_transform_kub }}" env_vars: SOURCE_URL: "gs://project-sunroof/csv/latest/project-sunroof-postal_code.csv" SOURCE_FILE: "files/data.csv" diff --git a/datasets/sunroof/solar_potential_by_postal_code/solar_potential_by_postal_code_dag.py b/datasets/sunroof_solar/solar_potential_by_postal_code/solar_potential_by_postal_code_dag.py similarity index 98% rename from datasets/sunroof/solar_potential_by_postal_code/solar_potential_by_postal_code_dag.py rename to datasets/sunroof_solar/solar_potential_by_postal_code/solar_potential_by_postal_code_dag.py index 9fb1625b5..928893c7a 100644 --- a/datasets/sunroof/solar_potential_by_postal_code/solar_potential_by_postal_code_dag.py +++ b/datasets/sunroof_solar/solar_potential_by_postal_code/solar_potential_by_postal_code_dag.py @@ -25,7 +25,7 @@ with DAG( - dag_id="sunroof.solar_potential_by_postal_code", + dag_id="sunroof_solar.solar_potential_by_postal_code", default_args=default_args, max_active_runs=1, schedule_interval="@daily", @@ -56,7 +56,7 @@ } }, image_pull_policy="Always", - image="{{ var.json.sunroof.container_registry.run_csv_transform_kub }}", + image="{{ var.json.sunroof_solar.container_registry.run_csv_transform_kub }}", env_vars={ "SOURCE_URL": "gs://project-sunroof/csv/latest/project-sunroof-postal_code.csv", "SOURCE_FILE": "files/data.csv",