diff --git a/datasets/google_trends/_terraform/google_trends_dataset.tf b/datasets/google_trends/_terraform/google_trends_dataset.tf new file mode 100644 index 000000000..709ff4556 --- /dev/null +++ b/datasets/google_trends/_terraform/google_trends_dataset.tf @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_dataset" "google_trends" { + dataset_id = "google_trends" + project = var.project_id + description = "The Google Trends dataset will provide critical signals that individual users and businesses alike can leverage to make better data-driven decisions. This dataset simplifies the manual interaction with the existing Google Trends UI by automating and exposing anonymized, aggregated, and indexed search data in BigQuery." +} + +output "bigquery_dataset-google_trends-dataset_id" { + value = google_bigquery_dataset.google_trends.dataset_id +} diff --git a/datasets/google_trends/_terraform/provider.tf b/datasets/google_trends/_terraform/provider.tf new file mode 100644 index 000000000..23ab87dcd --- /dev/null +++ b/datasets/google_trends/_terraform/provider.tf @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +provider "google" { + project = var.project_id + impersonate_service_account = var.impersonating_acct + region = var.region +} + +data "google_client_openid_userinfo" "me" {} + +output "impersonating-account" { + value = data.google_client_openid_userinfo.me.email +} diff --git a/datasets/google_trends/_terraform/top_terms_pipeline.tf b/datasets/google_trends/_terraform/top_terms_pipeline.tf new file mode 100644 index 000000000..61029dd55 --- /dev/null +++ b/datasets/google_trends/_terraform/top_terms_pipeline.tf @@ -0,0 +1,56 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "top_terms" { + project = var.project_id + dataset_id = "google_trends" + table_id = "top_terms" + + description = "Daily top 25 terms in the United States with score, ranking, time, and designated market area" + + depends_on = [ + google_bigquery_dataset.google_trends + ] +} + +output "bigquery_table-top_terms-table_id" { + value = google_bigquery_table.top_terms.table_id +} + +output "bigquery_table-top_terms-id" { + value = google_bigquery_table.top_terms.id +} + +resource "google_bigquery_table" "top_rising_terms" { + project = var.project_id + dataset_id = "google_trends" + table_id = "top_rising_terms" + + description = "Daily top rising terms in the United States with score, ranking, time, and designated market area" + + depends_on = [ + google_bigquery_dataset.google_trends + ] +} + +output "bigquery_table-top_rising_terms-table_id" { + value = google_bigquery_table.top_rising_terms.table_id +} + +output "bigquery_table-top_rising_terms-id" { + value = google_bigquery_table.top_rising_terms.id +} diff --git a/datasets/google_trends/_terraform/variables.tf b/datasets/google_trends/_terraform/variables.tf new file mode 100644 index 000000000..c3ec7c506 --- /dev/null +++ b/datasets/google_trends/_terraform/variables.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +variable "project_id" {} +variable "bucket_name_prefix" {} +variable "impersonating_acct" {} +variable "region" {} +variable "env" {} + diff --git a/datasets/google_trends/dataset.yaml b/datasets/google_trends/dataset.yaml new file mode 100644 index 000000000..267dcdfd0 --- /dev/null +++ b/datasets/google_trends/dataset.yaml @@ -0,0 +1,58 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dataset: + # The `dataset` block includes properties for your dataset that will be shown + # to users of your data on the Google Cloud website. + + # Must be exactly the same name as the folder name your dataset.yaml is in. + name: google_trends + + # A friendly, human-readable name of the dataset + friendly_name: ~ + + # A short, descriptive summary of the dataset. + description: ~ + + # A list of sources the dataset is derived from, using the YAML list syntax. + dataset_sources: ~ + + # A list of terms and conditions that users of the dataset should agree on, + # using the YAML list syntax. + terms_of_use: ~ + + +resources: + # A list of Google Cloud resources needed by your dataset. In principle, all + # pipelines under a dataset should be able to share these resources. + # + # The currently supported resources are shown below. Use only the resources + # you need, and delete the rest as needed by your pipeline. + # + # We will keep adding to the list below to support more Google Cloud resources + # over time. If a resource you need isn't supported, please file an issue on + # the repository. + + - type: bigquery_dataset + # Google BigQuery dataset to namespace all tables managed by this folder + # + # Required Properties: + # dataset_id + # + # Optional Properties: + # friendly_name (A user-friendly name of the dataset) + # description (A user-friendly description of the dataset) + # location (The geographic location where the dataset should reside) + dataset_id: google_trends + description: "The Google Trends dataset will provide critical signals that individual users and businesses alike can leverage to make better data-driven decisions. This dataset simplifies the manual interaction with the existing Google Trends UI by automating and exposing anonymized, aggregated, and indexed search data in BigQuery." diff --git a/datasets/google_trends/top_terms/pipeline.yaml b/datasets/google_trends/top_terms/pipeline.yaml new file mode 100644 index 000000000..50e0b1dde --- /dev/null +++ b/datasets/google_trends/top_terms/pipeline.yaml @@ -0,0 +1,60 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + - type: bigquery_table + table_id: top_terms + description: "Daily top 25 terms in the United States with score, ranking, time, and designated market area" + + - type: bigquery_table + table_id: top_rising_terms + description: "Daily top rising terms in the United States with score, ranking, time, and designated market area" + +dag: + initialize: + dag_id: top_terms + default_args: + owner: "Google" + depends_on_past: False + start_date: '2021-06-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "BigQueryToBigQueryOperator" + description: "Task to run a BQ to BQ operation" + + args: + task_id: "fetch_and_load_top_n" + source_project_dataset_tables: ["{{ var.json.google_trends.top_n.source_project_dataset_table }}"] + destination_project_dataset_table: "{{ var.json.google_trends.top_n.destination_project_dataset_table }}" + impersonation_chain: "{{ var.json.google_trends.service_account }}" + write_disposition: "WRITE_TRUNCATE" + + - operator: "BigQueryToBigQueryOperator" + description: "Task to run a BQ to BQ operation" + + args: + task_id: "fetch_and_load_top_rising" + source_project_dataset_tables: ["{{ var.json.google_trends.top_rising.source_project_dataset_table }}"] + destination_project_dataset_table: "{{ var.json.google_trends.top_rising.destination_project_dataset_table }}" + impersonation_chain: "{{ var.json.google_trends.service_account }}" + write_disposition: "WRITE_TRUNCATE" + + graph_paths: + - "fetch_and_load_top_n" + - "fetch_and_load_top_rising" diff --git a/datasets/google_trends/top_terms/top_terms_dag.py b/datasets/google_trends/top_terms/top_terms_dag.py new file mode 100644 index 000000000..4e98e112c --- /dev/null +++ b/datasets/google_trends/top_terms/top_terms_dag.py @@ -0,0 +1,58 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import bigquery_to_bigquery + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-06-01", +} + + +with DAG( + dag_id="google_trends.top_terms", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Task to run a BQ to BQ operation + fetch_and_load_top_n = bigquery_to_bigquery.BigQueryToBigQueryOperator( + task_id="fetch_and_load_top_n", + source_project_dataset_tables=[ + "{{ var.json.google_trends.top_n.source_project_dataset_table }}" + ], + destination_project_dataset_table="{{ var.json.google_trends.top_n.destination_project_dataset_table }}", + impersonation_chain="{{ var.json.google_trends.service_account }}", + write_disposition="WRITE_TRUNCATE", + ) + + # Task to run a BQ to BQ operation + fetch_and_load_top_rising = bigquery_to_bigquery.BigQueryToBigQueryOperator( + task_id="fetch_and_load_top_rising", + source_project_dataset_tables=[ + "{{ var.json.google_trends.top_rising.source_project_dataset_table }}" + ], + destination_project_dataset_table="{{ var.json.google_trends.top_rising.destination_project_dataset_table }}", + impersonation_chain="{{ var.json.google_trends.service_account }}", + write_disposition="WRITE_TRUNCATE", + ) + + fetch_and_load_top_n + fetch_and_load_top_rising