diff --git a/datasets/san_francisco_311_service_requests/311_service_requests/311_service_requests_dag.py b/datasets/san_francisco_311_service_requests/311_service_requests/311_service_requests_dag.py new file mode 100644 index 000000000..d5014660f --- /dev/null +++ b/datasets/san_francisco_311_service_requests/311_service_requests/311_service_requests_dag.py @@ -0,0 +1,201 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.providers.cncf.kubernetes.operators import kubernetes_pod +from airflow.providers.google.cloud.transfers import gcs_to_bigquery + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="san_francisco_311_service_requests.311_service_requests", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + transform_csv = kubernetes_pod.KubernetesPodOperator( + task_id="transform_csv", + name="311_service_requests", + namespace="default", + affinity={ + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "cloud.google.com/gke-nodepool", + "operator": "In", + "values": ["pool-e2-standard-4"], + } + ] + } + ] + } + } + }, + image_pull_policy="Always", + image="{{ var.json.san_francisco_311_service_requests.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://data.sfgov.org/api/views/vw6y-z8j6/rows.csv", + "SOURCE_FILE": "files/data.csv", + "TARGET_FILE": "files/data_output.csv", + "CHUNKSIZE": "750000", + "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", + "TARGET_GCS_PATH": "data/san_francisco_311_service_requests/311_service_requests/data_output.csv", + }, + resources={"limit_memory": "8G", "limit_cpu": "3"}, + ) + + # Task to load CSV data to a BigQuery table + load_to_bq = gcs_to_bigquery.GCSToBigQueryOperator( + task_id="load_to_bq", + bucket="{{ var.value.composer_bucket }}", + source_objects=[ + "data/san_francisco_311_service_requests/311_service_requests/data_output.csv" + ], + source_format="CSV", + destination_project_dataset_table="san_francisco_311_service_requests.311_service_requests", + skip_leading_rows=1, + allow_quoted_newlines=True, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "unique_key", + "type": "INTEGER", + "description": "Unique case id", + "mode": "REQUIRED", + }, + { + "name": "created_date", + "type": "TIMESTAMP", + "description": "The date and time when the service request was made", + "mode": "NULLABLE", + }, + { + "name": "closed_date", + "type": "TIMESTAMP", + "description": "The date and time when the service request was closed", + "mode": "NULLABLE", + }, + { + "name": "resolution_action_updated_date", + "type": "TIMESTAMP", + "description": "The date and time when the service request was last modified. For requests with status=closed, this will be the date the request was closed", + "mode": "NULLABLE", + }, + { + "name": "status", + "type": "STRING", + "description": "The current status of the service request.", + "mode": "NULLABLE", + }, + { + "name": "status_notes", + "type": "STRING", + "description": "Explanation of why status was changed to current state or more details on current status than conveyed with status alone", + "mode": "NULLABLE", + }, + { + "name": "agency_name", + "type": "STRING", + "description": "The agency responsible for fulfilling or otherwise addressing the service request.", + "mode": "NULLABLE", + }, + { + "name": "category", + "type": "STRING", + "description": "The Human readable name of the specific service request type (service_name)", + "mode": "NULLABLE", + }, + { + "name": "complaint_type", + "type": "STRING", + "description": "More specific description of the problem related to the Category", + "mode": "NULLABLE", + }, + { + "name": "descriptor", + "type": "STRING", + "description": "More specific description of the problem related to the Request Type", + "mode": "NULLABLE", + }, + { + "name": "incident_address", + "type": "STRING", + "description": "Human readable address or description of location", + "mode": "NULLABLE", + }, + { + "name": "supervisor_district", + "type": "INTEGER", + "description": "", + "mode": "NULLABLE", + }, + { + "name": "neighborhood", + "type": "STRING", + "description": "", + "mode": "NULLABLE", + }, + { + "name": "location", + "type": "STRING", + "description": "Latitude and longitude using the (WGS84) projection.", + "mode": "NULLABLE", + }, + { + "name": "source", + "type": "STRING", + "description": "How the service request was made", + "mode": "NULLABLE", + }, + { + "name": "media_url", + "type": "STRING", + "description": "Website URL", + "mode": "NULLABLE", + }, + { + "name": "latitude", + "type": "FLOAT", + "description": "Latitude using the (WGS84) projection.", + "mode": "NULLABLE", + }, + { + "name": "longitude", + "type": "FLOAT", + "description": "Longitude using the (WGS84) projection.", + "mode": "NULLABLE", + }, + { + "name": "police_district", + "type": "STRING", + "description": "", + "mode": "NULLABLE", + }, + ], + ) + + transform_csv >> load_to_bq diff --git a/datasets/san_francisco_311_service_requests/311_service_requests/pipeline.yaml b/datasets/san_francisco_311_service_requests/311_service_requests/pipeline.yaml new file mode 100644 index 000000000..dde4fcb25 --- /dev/null +++ b/datasets/san_francisco_311_service_requests/311_service_requests/pipeline.yaml @@ -0,0 +1,159 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + table_id: "311_service_requests" + description: "san_francisco_311_service_requests" + +dag: + airflow_version: 2 + initialize: + dag_id: 311_service_requests + default_args: + owner: "Google" + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + + - operator: "KubernetesPodOperator" + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "transform_csv" + name: "311_service_requests" + namespace: "default" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cloud.google.com/gke-nodepool + operator: In + values: + - "pool-e2-standard-4" + image_pull_policy: "Always" + image: "{{ var.json.san_francisco_311_service_requests.container_registry.run_csv_transform_kub }}" + env_vars: + SOURCE_URL: "https://data.sfgov.org/api/views/vw6y-z8j6/rows.csv" + SOURCE_FILE: "files/data.csv" + TARGET_FILE: "files/data_output.csv" + CHUNKSIZE: "750000" + TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" + TARGET_GCS_PATH: "data/san_francisco_311_service_requests/311_service_requests/data_output.csv" + resources: + limit_memory: "8G" + limit_cpu: "3" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_to_bq" + bucket: "{{ var.value.composer_bucket }}" + source_objects: ["data/san_francisco_311_service_requests/311_service_requests/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "san_francisco_311_service_requests.311_service_requests" + skip_leading_rows: 1 + allow_quoted_newlines: True + write_disposition: "WRITE_TRUNCATE" + schema_fields: + - name: "unique_key" + type: "INTEGER" + description: "Unique case id" + mode: "REQUIRED" + - name: "created_date" + type: "TIMESTAMP" + description: "The date and time when the service request was made" + mode: "NULLABLE" + - name: "closed_date" + type: "TIMESTAMP" + description: "The date and time when the service request was closed" + mode: "NULLABLE" + - name: "resolution_action_updated_date" + type: "TIMESTAMP" + description: "The date and time when the service request was last modified. For requests with status=closed, this will be the date the request was closed" + mode: "NULLABLE" + - name: "status" + type: "STRING" + description: "The current status of the service request." + mode: "NULLABLE" + - name: "status_notes" + type: "STRING" + description: "Explanation of why status was changed to current state or more details on current status than conveyed with status alone" + mode: "NULLABLE" + - name: "agency_name" + type: "STRING" + description: "The agency responsible for fulfilling or otherwise addressing the service request." + mode: "NULLABLE" + - name: "category" + type: "STRING" + description: "The Human readable name of the specific service request type (service_name)" + mode: "NULLABLE" + - name: "complaint_type" + type: "STRING" + description: "More specific description of the problem related to the Category" + mode: "NULLABLE" + - name: "descriptor" + type: "STRING" + description: "More specific description of the problem related to the Request Type" + mode: "NULLABLE" + - name: "incident_address" + type: "STRING" + description: "Human readable address or description of location" + mode: "NULLABLE" + - name: "supervisor_district" + type: "INTEGER" + description: "" + mode: "NULLABLE" + - name: "neighborhood" + type: "STRING" + description: "" + mode: "NULLABLE" + - name: "location" + type: "STRING" + description: "Latitude and longitude using the (WGS84) projection." + mode: "NULLABLE" + - name: "source" + type: "STRING" + description: "How the service request was made" + mode: "NULLABLE" + - name: "media_url" + type: "STRING" + description: "Website URL" + mode: "NULLABLE" + - name: "latitude" + type: "FLOAT" + description: "Latitude using the (WGS84) projection." + mode: "NULLABLE" + - name: "longitude" + type: "FLOAT" + description: "Longitude using the (WGS84) projection." + mode: "NULLABLE" + - name: "police_district" + type: "STRING" + description: "" + mode: "NULLABLE" + + + graph_paths: + - "transform_csv >> load_to_bq" diff --git a/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/Dockerfile b/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/Dockerfile new file mode 100644 index 000000000..748bc3bec --- /dev/null +++ b/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/Dockerfile @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM python:3.8 +ENV PYTHONUNBUFFERED True +COPY requirements.txt ./ +RUN python3 -m pip install --no-cache-dir -r requirements.txt +WORKDIR /custom +COPY ./csv_transform.py . +CMD ["python3", "csv_transform.py"] diff --git a/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/csv_transform.py b/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/csv_transform.py new file mode 100644 index 000000000..063c9da7d --- /dev/null +++ b/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/csv_transform.py @@ -0,0 +1,248 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import logging +import os +import pathlib + +import pandas as pd +import requests +from google.cloud import storage + + +def main( + source_url: str, + source_file: pathlib.Path, + target_file: pathlib.Path, + chunksize: str, + target_gcs_bucket: str, + target_gcs_path: str, +) -> None: + + logging.info("San Francisco - 311 Service Requests process started") + + pathlib.Path("./files").mkdir(parents=True, exist_ok=True) + download_file(source_url, source_file) + + chunksz = int(chunksize) + + logging.info(f"Opening source file {source_file}") + with pd.read_csv( + source_file, + engine="python", + encoding="utf-8", + quotechar='"', + sep=",", + chunksize=chunksz, + ) as reader: + for chunk_number, chunk in enumerate(reader): + target_file_batch = str(target_file).replace( + ".csv", "-" + str(chunk_number) + ".csv" + ) + df = pd.DataFrame() + df = pd.concat([df, chunk]) + process_chunk(df, target_file_batch, target_file, (not chunk_number == 0)) + + upload_file_to_gcs(target_file, target_gcs_bucket, target_gcs_path) + + logging.info("San Francisco - 311 Service Requests process completed") + + +def download_file(source_url: str, source_file: pathlib.Path) -> None: + logging.info(f"downloading file {source_file} from {source_url}") + r = requests.get(source_url, stream=True) + with open(source_file, "wb") as f: + for chunk in r: + f.write(chunk) + + +def process_chunk( + df: pd.DataFrame, target_file_batch: str, target_file: str, skip_header: bool +) -> None: + logging.info(f"Processing batch file {target_file_batch}") + df = rename_headers(df) + df = remove_empty_key_rows(df, "unique_key") + df = resolve_datatypes(df) + df = remove_parenthesis_long_lat(df) + df = strip_whitespace(df) + df = resolve_date_format(df) + df = reorder_headers(df) + save_to_new_file(df, file_path=str(target_file_batch)) + append_batch_file(target_file_batch, target_file, skip_header) + logging.info(f"Processing batch file {target_file_batch} completed") + + +def rename_headers(df: pd.DataFrame) -> pd.DataFrame: + logging.info("Renaming headers") + header_names = { + "CaseID": "unique_key", + "Opened": "created_date", + "Closed": "closed_date", + "Updated": "resolution_action_updated_date", + "Status": "status", + "Status Notes": "status_notes", + "Responsible Agency": "agency_name", + "Category": "category", + "Request Type": "complaint_type", + "Request Details": "descriptor", + "Address": "incident_address", + "Supervisor District": "supervisor_district", + "Neighborhood": "neighborhood", + "Point": "location", + "Source": "source", + "Media URL": "media_url", + "Latitude": "latitude", + "Longitude": "longitude", + "Police District": "police_district", + } + df = df.rename(columns=header_names) + + return df + + +def remove_empty_key_rows(df: pd.DataFrame, key_field: str) -> pd.DataFrame: + logging.info("Removing rows with empty keys") + df = df[df[key_field] != ""] + + return df + + +def resolve_datatypes(df: pd.DataFrame) -> pd.DataFrame: + logging.info("Resolving datatypes") + df["supervisor_district"] = df["supervisor_district"].astype("Int64") + + return df + + +def remove_parenthesis_long_lat(df: pd.DataFrame) -> pd.DataFrame: + logging.info("Removing parenthesis from latitude and longitude") + df["latitude"].replace("(", "", regex=False, inplace=True) + df["latitude"].replace(")", "", regex=False, inplace=True) + df["longitude"].replace("(", "", regex=False, inplace=True) + df["longitude"].replace(")", "", regex=False, inplace=True) + + return df + + +def strip_whitespace(df: pd.DataFrame) -> pd.DataFrame: + logging.info("Stripping whitespace") + ws_fields = ["incident_address"] + + for ws_fld in ws_fields: + df[ws_fld] = df[ws_fld].apply(lambda x: str(x).strip()) + + return df + + +def resolve_date_format(df: pd.DataFrame) -> pd.DataFrame: + logging.info("Resolving date formats") + date_fields = [ + "created_date", + "closed_date", + "resolution_action_updated_date", + ] + + for dt_fld in date_fields: + df[dt_fld] = df[dt_fld].apply(convert_dt_format) + + return df + + +def convert_dt_format(dt_str: str) -> str: + if not dt_str or str(dt_str).lower() == "nan" or str(dt_str).lower() == "nat": + return "" + elif ( + dt_str.strip()[2] == "/" + ): # if there is a '/' in 3rd position, then we have a date format mm/dd/yyyy + return datetime.datetime.strptime(dt_str, "%m/%d/%Y %H:%M:%S %p").strftime( + "%Y-%m-%d %H:%M:%S" + ) + else: + return str(dt_str) + + +def reorder_headers(df: pd.DataFrame) -> pd.DataFrame: + logging.info("Reordering headers") + df = df[ + [ + "unique_key", + "created_date", + "closed_date", + "resolution_action_updated_date", + "status", + "status_notes", + "agency_name", + "category", + "complaint_type", + "descriptor", + "incident_address", + "supervisor_district", + "neighborhood", + "location", + "source", + "media_url", + "latitude", + "longitude", + "police_district", + ] + ] + + return df + + +def save_to_new_file(df: pd.DataFrame, file_path) -> None: + df.to_csv(file_path, index=False) + + +def append_batch_file( + batch_file_path: str, target_file_path: str, skip_header: bool +) -> None: + data_file = open(batch_file_path, "r") + if os.path.exists(target_file_path): + target_file = open(target_file_path, "a+") + else: + target_file = open(target_file_path, "w") + if skip_header: + logging.info( + f"Appending batch file {batch_file_path} to {target_file_path} with skip header" + ) + next(data_file) + else: + logging.info(f"Appending batch file {batch_file_path} to {target_file_path}") + target_file.write(data_file.read()) + data_file.close() + target_file.close() + if os.path.exists(batch_file_path): + os.remove(batch_file_path) + + +def upload_file_to_gcs(file_path: pathlib.Path, gcs_bucket: str, gcs_path: str) -> None: + storage_client = storage.Client() + bucket = storage_client.bucket(gcs_bucket) + blob = bucket.blob(gcs_path) + blob.upload_from_filename(file_path) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + + main( + source_url=os.environ["SOURCE_URL"], + source_file=pathlib.Path(os.environ["SOURCE_FILE"]).expanduser(), + target_file=pathlib.Path(os.environ["TARGET_FILE"]).expanduser(), + chunksize=os.environ["CHUNKSIZE"], + target_gcs_bucket=os.environ["TARGET_GCS_BUCKET"], + target_gcs_path=os.environ["TARGET_GCS_PATH"], + ) diff --git a/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/requirements.txt b/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/requirements.txt new file mode 100644 index 000000000..f36704793 --- /dev/null +++ b/datasets/san_francisco_311_service_requests/_images/run_csv_transform_kub/requirements.txt @@ -0,0 +1,3 @@ +requests +pandas +google-cloud-storage diff --git a/datasets/san_francisco_311_service_requests/_terraform/311_service_requests_pipeline.tf b/datasets/san_francisco_311_service_requests/_terraform/311_service_requests_pipeline.tf new file mode 100644 index 000000000..91e6699a6 --- /dev/null +++ b/datasets/san_francisco_311_service_requests/_terraform/311_service_requests_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "bqt_311_service_requests" { + project = var.project_id + dataset_id = "san_francisco_311_service_requests" + table_id = "311_service_requests" + + description = "san_francisco_311_service_requestsspc" + + + + + depends_on = [ + google_bigquery_dataset.san_francisco_311_service_requests + ] +} + +output "bigquery_table-311_service_requests-table_id" { + value = google_bigquery_table.bqt_311_service_requests.table_id +} + +output "bigquery_table-311_service_requests-id" { + value = google_bigquery_table.bqt_311_service_requests.id +} diff --git a/datasets/san_francisco_311_service_requests/_terraform/provider.tf b/datasets/san_francisco_311_service_requests/_terraform/provider.tf new file mode 100644 index 000000000..23ab87dcd --- /dev/null +++ b/datasets/san_francisco_311_service_requests/_terraform/provider.tf @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +provider "google" { + project = var.project_id + impersonate_service_account = var.impersonating_acct + region = var.region +} + +data "google_client_openid_userinfo" "me" {} + +output "impersonating-account" { + value = data.google_client_openid_userinfo.me.email +} diff --git a/datasets/san_francisco_311_service_requests/_terraform/san_francisco_311_service_requests_dataset.tf b/datasets/san_francisco_311_service_requests/_terraform/san_francisco_311_service_requests_dataset.tf new file mode 100644 index 000000000..fac313b71 --- /dev/null +++ b/datasets/san_francisco_311_service_requests/_terraform/san_francisco_311_service_requests_dataset.tf @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_dataset" "san_francisco_311_service_requests" { + dataset_id = "san_francisco_311_service_requests" + project = var.project_id + description = "san_francisco_311_service_requests" +} + +output "bigquery_dataset-san_francisco_311_service_requests-dataset_id" { + value = google_bigquery_dataset.san_francisco_311_service_requests.dataset_id +} diff --git a/datasets/san_francisco_311_service_requests/_terraform/variables.tf b/datasets/san_francisco_311_service_requests/_terraform/variables.tf new file mode 100644 index 000000000..c3ec7c506 --- /dev/null +++ b/datasets/san_francisco_311_service_requests/_terraform/variables.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +variable "project_id" {} +variable "bucket_name_prefix" {} +variable "impersonating_acct" {} +variable "region" {} +variable "env" {} + diff --git a/datasets/san_francisco_311_service_requests/dataset.yaml b/datasets/san_francisco_311_service_requests/dataset.yaml new file mode 100644 index 000000000..6008b87f4 --- /dev/null +++ b/datasets/san_francisco_311_service_requests/dataset.yaml @@ -0,0 +1,27 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dataset: + name: san_francisco_311_service_requests + friendly_name: ~ + description: ~ + dataset_sources: ~ + terms_of_use: ~ + + +resources: + + - type: bigquery_dataset + dataset_id: san_francisco_311_service_requests + description: san_francisco_311_service_requests