New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Onboard BLS - CPSAAT 2020 dataset #105
Changes from 3 commits
1cf0db2
fe2a301
52914e2
f309da6
3c83177
0ee4376
e0b82a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/** | ||
* Copyright 2021 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
|
||
resource "google_bigquery_dataset" "bls" { | ||
dataset_id = "bls" | ||
project = var.project_id | ||
description = "Overview: This dataset includes economic statistics on inflation, prices, unemployment, and pay \u0026 benefits provided by the Bureau of Labor Statistics (BLS).\n\nUpdate frequency: Monthly\n\nDataset source: U.S. Bureau of Labor Statistics\n\nTerms of use: This dataset is publicly available for anyone to use under the following terms provided by the Dataset Source -\u00a0http://www.data.gov/privacy-policy#data_policy\u00a0- and is provided \"AS IS\" without any warranty, express or implied, from Google. Google disclaims all liability for any damages, direct or indirect, resulting from the use of the dataset.\n\nSee the GCP Marketplace listing for more details and sample queries: https://console.cloud.google.com/marketplace/details/bls-public-data/bureau-of-labor-statistics" | ||
} | ||
|
||
output "bigquery_dataset-bls-dataset_id" { | ||
value = google_bigquery_dataset.bls.dataset_id | ||
} | ||
|
||
resource "google_storage_bucket" "bls" { | ||
name = "${var.bucket_name_prefix}-bls" | ||
force_destroy = true | ||
} | ||
|
||
output "storage_bucket-bls-name" { | ||
value = google_storage_bucket.bls.name | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/** | ||
* Copyright 2021 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
|
||
resource "google_bigquery_table" "cpsaat18" { | ||
project = var.project_id | ||
dataset_id = "bls" | ||
table_id = "cpsaat18" | ||
|
||
description = "Current population survey 18: Employed persons by detailed industry, sex, race, and Hispanic or Latino ethnicity" | ||
|
||
depends_on = [ | ||
google_bigquery_dataset.bls | ||
] | ||
} | ||
|
||
output "bigquery_table-cpsaat18-table_id" { | ||
value = google_bigquery_table.cpsaat18.table_id | ||
} | ||
|
||
output "bigquery_table-cpsaat18-id" { | ||
value = google_bigquery_table.cpsaat18.id | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/** | ||
* Copyright 2021 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
|
||
provider "google" { | ||
project = var.project_id | ||
impersonate_service_account = var.impersonating_acct | ||
region = var.region | ||
} | ||
|
||
data "google_client_openid_userinfo" "me" {} | ||
|
||
output "impersonating-account" { | ||
value = data.google_client_openid_userinfo.me.email | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
/** | ||
* Copyright 2021 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
|
||
variable "project_id" {} | ||
variable "bucket_name_prefix" {} | ||
variable "impersonating_acct" {} | ||
variable "region" {} | ||
variable "env" {} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# Copyright 2021 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
from airflow import DAG | ||
from airflow.contrib.operators import gcs_to_bq | ||
|
||
default_args = { | ||
"owner": "Google", | ||
"depends_on_past": False, | ||
"start_date": "2021-06-23", | ||
} | ||
|
||
|
||
with DAG( | ||
dag_id="bls.cpsaat18", | ||
default_args=default_args, | ||
max_active_runs=1, | ||
schedule_interval="@once", | ||
catchup=False, | ||
default_view="graph", | ||
) as dag: | ||
|
||
# Task to load the CPSAAT18 data to the BigQuery table | ||
load_csv_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( | ||
task_id="load_csv_to_bq", | ||
bucket="{{ var.json.bls.source_bucket }}", | ||
source_objects=["cpsaat18/2020.csv"], | ||
source_format="CSV", | ||
destination_project_dataset_table="bls.cpsaat18", | ||
skip_leading_rows=1, | ||
write_disposition="WRITE_TRUNCATE", | ||
schema_fields=[ | ||
{"name": "year", "type": "integer", "nullable": False}, | ||
{"name": "sector", "type": "string", "nullable": False}, | ||
{"name": "subsector", "type": "string", "nullable": True}, | ||
{"name": "industry_group", "type": "string", "nullable": True}, | ||
{"name": "industry", "type": "string", "nullable": True}, | ||
{ | ||
"name": "total_employed_in_thousands", | ||
"type": "integer", | ||
"nullable": True, | ||
}, | ||
{"name": "percent_women", "type": "float", "nullable": True}, | ||
{"name": "percent_white", "type": "float", "nullable": True}, | ||
{ | ||
"name": "percent_black_or_african_american", | ||
"type": "float", | ||
"nullable": True, | ||
}, | ||
{"name": "percent_asian", "type": "float", "nullable": True}, | ||
{"name": "percent_hispanic_or_latino", "type": "float", "nullable": True}, | ||
], | ||
) | ||
|
||
load_csv_to_bq |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# Copyright 2021 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
--- | ||
resources: | ||
- type: bigquery_table | ||
table_id: cpsaat18 | ||
description: "Current population survey 18: Employed persons by detailed industry, sex, race, and Hispanic or Latino ethnicity" | ||
|
||
dag: | ||
initialize: | ||
dag_id: cpsaat18 | ||
default_args: | ||
owner: "Google" | ||
depends_on_past: False | ||
start_date: '2021-06-23' | ||
max_active_runs: 1 | ||
schedule_interval: "@once" | ||
catchup: False | ||
default_view: graph | ||
|
||
tasks: | ||
- operator: "GoogleCloudStorageToBigQueryOperator" | ||
description: "Task to load the CPSAAT18 data to the BigQuery table" | ||
args: | ||
task_id: "load_csv_to_bq" | ||
bucket: "{{ var.json.bls.source_bucket }}" | ||
source_objects: ["cpsaat18/2020.csv"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. csv is 2020 so if we switch to yearly this doesn't work There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done! 3c83177 |
||
source_format: "CSV" | ||
destination_project_dataset_table: "bls.cpsaat18" | ||
skip_leading_rows: 1 | ||
write_disposition: "WRITE_TRUNCATE" | ||
schema_fields: | ||
- name: "year" | ||
type: "integer" | ||
nullable: false | ||
- name: "sector" | ||
type: "string" | ||
nullable: false | ||
- name: "subsector" | ||
type: "string" | ||
nullable: true | ||
- name: "industry_group" | ||
type: "string" | ||
nullable: true | ||
- name: "industry" | ||
type: "string" | ||
nullable: true | ||
- name: "total_employed_in_thousands" | ||
type: "integer" | ||
nullable: true | ||
- name: "percent_women" | ||
leahecole marked this conversation as resolved.
Show resolved
Hide resolved
|
||
type: "float" | ||
nullable: true | ||
- name: "percent_white" | ||
type: "float" | ||
nullable: true | ||
- name: "percent_black_or_african_american" | ||
type: "float" | ||
nullable: true | ||
- name: "percent_asian" | ||
type: "float" | ||
nullable: true | ||
- name: "percent_hispanic_or_latino" | ||
type: "float" | ||
nullable: true | ||
|
||
graph_paths: | ||
- "load_csv_to_bq" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Copyright 2021 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
dataset: | ||
name: bls | ||
friendly_name: ~ | ||
description: ~ | ||
dataset_sources: ~ | ||
terms_of_use: ~ | ||
|
||
|
||
resources: | ||
- type: bigquery_dataset | ||
dataset_id: bls | ||
description: "Overview: This dataset includes economic statistics on inflation, prices, unemployment, and pay & benefits provided by the Bureau of Labor Statistics (BLS).\n\nUpdate frequency: Monthly\n\nDataset source: U.S. Bureau of Labor Statistics\n\nTerms of use: This dataset is publicly available for anyone to use under the following terms provided by the Dataset Source - http://www.data.gov/privacy-policy#data_policy - and is provided \"AS IS\" without any warranty, express or implied, from Google. Google disclaims all liability for any damages, direct or indirect, resulting from the use of the dataset.\n\nSee the GCP Marketplace listing for more details and sample queries: https://console.cloud.google.com/marketplace/details/bls-public-data/bureau-of-labor-statistics" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. now that #103 is in we can get rid of the newlines :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you go! Looks nicer now on the YAML config: e0b82a4 |
||
|
||
- type: storage_bucket | ||
name: bls |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should be yearly
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done! f309da6