diff --git a/datasets/irs_990/_images/run_csv_transform_kub/Dockerfile b/datasets/irs_990/_images/run_csv_transform_kub/Dockerfile new file mode 100644 index 000000000..85af90570 --- /dev/null +++ b/datasets/irs_990/_images/run_csv_transform_kub/Dockerfile @@ -0,0 +1,38 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The base image for this build +# FROM gcr.io/google.com/cloudsdktool/cloud-sdk:slim +FROM python:3.8 + +# Allow statements and log messages to appear in Cloud logs +ENV PYTHONUNBUFFERED True + +# Copy the requirements file into the image +COPY requirements.txt ./ + +# Install the packages specified in the requirements file +RUN python3 -m pip install --no-cache-dir -r requirements.txt + +# The WORKDIR instruction sets the working directory for any RUN, CMD, +# ENTRYPOINT, COPY and ADD instructions that follow it in the Dockerfile. +# If the WORKDIR doesn’t exist, it will be created even if it’s not used in +# any subsequent Dockerfile instruction +WORKDIR /custom + +# Copy the specific data processing script/s in the image under /custom/* +COPY ./csv_transform.py . + +# Command to run the data processing script when the container is run +CMD ["python3", "csv_transform.py"] diff --git a/datasets/irs_990/_images/run_csv_transform_kub/Pipfile b/datasets/irs_990/_images/run_csv_transform_kub/Pipfile new file mode 100644 index 000000000..37f9797d3 --- /dev/null +++ b/datasets/irs_990/_images/run_csv_transform_kub/Pipfile @@ -0,0 +1,13 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +requests = "*" +vaex = "*" + +[dev-packages] + +[requires] +python_version = "3.9" diff --git a/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py b/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py new file mode 100644 index 000000000..e9af82792 --- /dev/null +++ b/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py @@ -0,0 +1,156 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import datetime +import json +import logging +import math +import os +import pathlib +import re +import typing +from urllib.parse import urlparse + +import pandas as pd +import requests +from google.cloud import storage + + +def main( + source_url: str, + source_file: pathlib.Path, + target_file: pathlib.Path, + target_gcs_bucket: str, + target_gcs_path: str, + headers: typing.List[str], + rename_mappings: dict, + pipeline_name: str, +) -> None: + + logging.info( + f"irs 990 {pipeline_name} process started at " + + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + ) + + logging.info("creating 'files' folder") + pathlib.Path("./files").mkdir(parents=True, exist_ok=True) + + logging.info(f"Downloading file from {source_url}... ") + download_file(source_url, source_file) + + logging.info(f"Opening file {source_file} ... ") + str_value = os.path.basename(urlparse(source_url).path) + + if re.search("zip", str_value): + df = pd.read_csv( + str(source_file), compression="zip", encoding="utf-8", sep=r"\s+" + ) + else: + df = pd.read_csv(str(source_file), encoding="utf-8", sep=r"\s+") + + logging.info(f"Transforming {source_file} ...") + + logging.info(f"Transform: Rename columns {source_file} ...") + rename_headers(df, rename_mappings) + + logging.info(f"Transform: filtering null values {source_file} ...") + filter_null_rows(df) + + logging.info(f"Transform: converting to integer {source_file} ...") + + if re.search("pf", pipeline_name): + df.invstexcisetx = df.invstexcisetx.replace("N", 0) + df.crelamt = df.crelamt.replace("N", 0) + df.dvdndsinte = df.dvdndsinte.replace("N", 0) + df.intrstrvnue = df.intrstrvnue.replace("N", 0) + else: + df["totsupp509"] = df["totsupp509"].apply(convert_to_int) + + logging.info( + f"Transform: Reordering headers for {os.path.basename(urlparse(source_url).path)} ..." + ) + + df = df[headers] + + logging.info(f"Saving to output file {target_file} ...") + try: + save_to_new_file(df, file_path=str(target_file)) + except Exception as e: + logging.error(f"Error saving output file: {e}.") + + logging.info( + f"Uploading output file to.. gs://{target_gcs_bucket}/{target_gcs_path}" + ) + upload_file_to_gcs(target_file, target_gcs_bucket, target_gcs_path) + + logging.info( + f"irs 990 {pipeline_name} process completed at " + + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + ) + + +def rename_headers(df: pd.DataFrame, rename_mappings: dict) -> None: + df = df.rename(columns=rename_mappings, inplace=True) + + +def filter_null_rows(df: pd.DataFrame) -> None: + df = df[df.ein != ""] + + +def save_to_new_file(df: pd.DataFrame, file_path: pathlib.Path) -> None: + # df.export_csv(file_path) + df.to_csv(file_path, index=False) + + +def download_file(source_url: str, source_file: pathlib.Path) -> None: + logging.info(f"Downloading {source_url} into {source_file}") + r = requests.get(source_url, stream=True) + if r.status_code == 200: + with open(source_file, "wb") as f: + for chunk in r: + f.write(chunk) + else: + logging.error(f"Couldn't download {source_url}: {r.text}") + + +def convert_to_int(input: str) -> str: + str_val = "" + if input == "" or (math.isnan(input)): + str_val = "" + else: + str_val = str(int(round(input, 0))) + return str_val + + +def upload_file_to_gcs(file_path: pathlib.Path, gcs_bucket: str, gcs_path: str) -> None: + storage_client = storage.Client() + bucket = storage_client.bucket(gcs_bucket) + blob = bucket.blob(gcs_path) + blob.upload_from_filename(file_path) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + + main( + source_url=os.environ["SOURCE_URL"], + source_file=pathlib.Path(os.environ["SOURCE_FILE"]).expanduser(), + target_file=pathlib.Path(os.environ["TARGET_FILE"]).expanduser(), + target_gcs_bucket=os.environ["TARGET_GCS_BUCKET"], + target_gcs_path=os.environ["TARGET_GCS_PATH"], + headers=json.loads(os.environ["CSV_HEADERS"]), + rename_mappings=json.loads(os.environ["RENAME_MAPPINGS"]), + pipeline_name=os.environ["PIPELINE_NAME"], + ) diff --git a/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt b/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt new file mode 100644 index 000000000..1c45cdfc3 --- /dev/null +++ b/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt @@ -0,0 +1,3 @@ +requests +google-cloud-storage +pandas diff --git a/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf new file mode 100644 index 000000000..6f1027172 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_2014" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_2014" + + description = "IRS 990 2014 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_2014-table_id" { + value = google_bigquery_table.irs_990_2014.table_id +} + +output "bigquery_table-irs_990_2014-id" { + value = google_bigquery_table.irs_990_2014.id +} diff --git a/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf new file mode 100644 index 000000000..bb52f2ec2 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_2015" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_2015" + + description = "IRS 990 2015 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_2015-table_id" { + value = google_bigquery_table.irs_990_2015.table_id +} + +output "bigquery_table-irs_990_2015-id" { + value = google_bigquery_table.irs_990_2015.id +} diff --git a/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf new file mode 100644 index 000000000..9a98af73f --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_2016" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_2016" + + description = "IRS 990 2016 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_2016-table_id" { + value = google_bigquery_table.irs_990_2016.table_id +} + +output "bigquery_table-irs_990_2016-id" { + value = google_bigquery_table.irs_990_2016.id +} diff --git a/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf new file mode 100644 index 000000000..77dce805e --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_2017" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_2017" + + description = "IRS 990 2017 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_2017-table_id" { + value = google_bigquery_table.irs_990_2017.table_id +} + +output "bigquery_table-irs_990_2017-id" { + value = google_bigquery_table.irs_990_2017.id +} diff --git a/datasets/irs_990/_terraform/irs_990_dataset.tf b/datasets/irs_990/_terraform/irs_990_dataset.tf new file mode 100644 index 000000000..0f847c3b3 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_dataset.tf @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_dataset" "irs_990" { + dataset_id = "irs_990" + project = var.project_id + description = "irs_990" +} + +output "bigquery_dataset-irs_990-dataset_id" { + value = google_bigquery_dataset.irs_990.dataset_id +} diff --git a/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf new file mode 100644 index 000000000..e41c19b45 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_ez_2014" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_ez_2014" + + description = "IRS 990 EZ 2014 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_ez_2014-table_id" { + value = google_bigquery_table.irs_990_ez_2014.table_id +} + +output "bigquery_table-irs_990_ez_2014-id" { + value = google_bigquery_table.irs_990_ez_2014.id +} diff --git a/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf new file mode 100644 index 000000000..6ec430986 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_ez_2015" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_ez_2015" + + description = "IRS 990 EZ 2015 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_ez_2015-table_id" { + value = google_bigquery_table.irs_990_ez_2015.table_id +} + +output "bigquery_table-irs_990_ez_2015-id" { + value = google_bigquery_table.irs_990_ez_2015.id +} diff --git a/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf new file mode 100644 index 000000000..9ce37db35 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_ez_2016" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_ez_2016" + + description = "IRS 990 EZ 2016 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_ez_2016-table_id" { + value = google_bigquery_table.irs_990_ez_2016.table_id +} + +output "bigquery_table-irs_990_ez_2016-id" { + value = google_bigquery_table.irs_990_ez_2016.id +} diff --git a/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf new file mode 100644 index 000000000..81fbc1ae8 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_ez_2017" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_ez_2017" + + description = "IRS 990 EZ 2017 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_ez_2017-table_id" { + value = google_bigquery_table.irs_990_ez_2017.table_id +} + +output "bigquery_table-irs_990_ez_2017-id" { + value = google_bigquery_table.irs_990_ez_2017.id +} diff --git a/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf b/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf new file mode 100644 index 000000000..34b7d2bb3 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_pf_2014" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_pf_2014" + + description = "IRS 990 PF 2014 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_pf_2014-table_id" { + value = google_bigquery_table.irs_990_pf_2014.table_id +} + +output "bigquery_table-irs_990_pf_2014-id" { + value = google_bigquery_table.irs_990_pf_2014.id +} diff --git a/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf b/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf new file mode 100644 index 000000000..f8ed21abd --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_pf_2015" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_pf_2015" + + description = "IRS 990 PF 2015 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_pf_2015-table_id" { + value = google_bigquery_table.irs_990_pf_2015.table_id +} + +output "bigquery_table-irs_990_pf_2015-id" { + value = google_bigquery_table.irs_990_pf_2015.id +} diff --git a/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf b/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf new file mode 100644 index 000000000..99c885e12 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_pf_2016" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_pf_2016" + + description = "IRS 990 PF 2016 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_pf_2016-table_id" { + value = google_bigquery_table.irs_990_pf_2016.table_id +} + +output "bigquery_table-irs_990_pf_2016-id" { + value = google_bigquery_table.irs_990_pf_2016.id +} diff --git a/datasets/irs_990/_terraform/provider.tf b/datasets/irs_990/_terraform/provider.tf new file mode 100644 index 000000000..23ab87dcd --- /dev/null +++ b/datasets/irs_990/_terraform/provider.tf @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +provider "google" { + project = var.project_id + impersonate_service_account = var.impersonating_acct + region = var.region +} + +data "google_client_openid_userinfo" "me" {} + +output "impersonating-account" { + value = data.google_client_openid_userinfo.me.email +} diff --git a/datasets/irs_990/_terraform/variables.tf b/datasets/irs_990/_terraform/variables.tf new file mode 100644 index 000000000..c3ec7c506 --- /dev/null +++ b/datasets/irs_990/_terraform/variables.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +variable "project_id" {} +variable "bucket_name_prefix" {} +variable "impersonating_acct" {} +variable "region" {} +variable "env" {} + diff --git a/datasets/irs_990/dataset.yaml b/datasets/irs_990/dataset.yaml new file mode 100644 index 000000000..89fc44f5d --- /dev/null +++ b/datasets/irs_990/dataset.yaml @@ -0,0 +1,58 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dataset: + # The `dataset` block includes properties for your dataset that will be shown + # to users of your data on the Google Cloud website. + + # Must be exactly the same name as the folder name your dataset.yaml is in. + name: irs_990 + + # A friendly, human-readable name of the dataset + friendly_name: irs_990 + + # A short, descriptive summary of the dataset. + description: IRS 990 dataset + + # A list of sources the dataset is derived from, using the YAML list syntax. + dataset_sources: ~ + + # A list of terms and conditions that users of the dataset should agree on, + # using the YAML list syntax. + terms_of_use: ~ + + +resources: + # A list of Google Cloud resources needed by your dataset. In principle, all + # pipelines under a dataset should be able to share these resources. + # + # The currently supported resources are shown below. Use only the resources + # you need, and delete the rest as needed by your pipeline. + # + # We will keep adding to the list below to support more Google Cloud resources + # over time. If a resource you need isn't supported, please file an issue on + # the repository. + + - type: bigquery_dataset + # Google BigQuery dataset to namespace all tables managed by this folder + # + # Required Properties: + # dataset_id + # + # Optional Properties: + # friendly_name (A user-friendly name of the dataset) + # description (A user-friendly description of the dataset) + # location (The geographic location where the dataset should reside) + dataset_id: irs_990 + description: irs_990 diff --git a/datasets/irs_990/irs_990_2014/irs_990_2014_dag.py b/datasets/irs_990/irs_990_2014/irs_990_2014_dag.py new file mode 100644 index 000000000..b9fbb59fb --- /dev/null +++ b/datasets/irs_990/irs_990_2014/irs_990_2014_dag.py @@ -0,0 +1,314 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_2014", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_transform_csv", + startup_timeout_seconds=600, + name="irs_990_2014", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/14eofinextract990.zip", + "SOURCE_FILE": "files/data.zip", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_2014/data_output.csv", + "PIPELINE_NAME": "irs_990_2015", + "CSV_HEADERS": '["ein","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"}', + }, + resources={"request_memory": "2G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_2014/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_2014", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + {"name": "ein", "type": "string", "mode": "required"}, + {"name": "tax_pd", "type": "integer", "mode": "nullable"}, + {"name": "subseccd", "type": "integer", "mode": "nullable"}, + {"name": "s501c3or4947a1cd", "type": "string", "mode": "nullable"}, + {"name": "schdbind", "type": "string", "mode": "nullable"}, + {"name": "politicalactvtscd", "type": "string", "mode": "nullable"}, + {"name": "lbbyingactvtscd", "type": "string", "mode": "nullable"}, + {"name": "subjto6033cd", "type": "string", "mode": "nullable"}, + {"name": "dnradvisedfundscd", "type": "string", "mode": "nullable"}, + {"name": "prptyintrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "maintwrkofartcd", "type": "string", "mode": "nullable"}, + {"name": "crcounselingqstncd", "type": "string", "mode": "nullable"}, + {"name": "hldassetsintermpermcd", "type": "string", "mode": "nullable"}, + {"name": "rptlndbldgeqptcd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstothsecd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstprgrelcd", "type": "string", "mode": "nullable"}, + {"name": "rptothasstcd", "type": "string", "mode": "nullable"}, + {"name": "rptothliabcd", "type": "string", "mode": "nullable"}, + {"name": "sepcnsldtfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "sepindaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "inclinfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "operateschools170cd", "type": "string", "mode": "nullable"}, + {"name": "frgnofficecd", "type": "string", "mode": "nullable"}, + {"name": "frgnrevexpnscd", "type": "string", "mode": "nullable"}, + {"name": "frgngrntscd", "type": "string", "mode": "nullable"}, + {"name": "frgnaggragrntscd", "type": "string", "mode": "nullable"}, + {"name": "rptprofndrsngfeescd", "type": "string", "mode": "nullable"}, + {"name": "rptincfnndrsngcd", "type": "string", "mode": "nullable"}, + {"name": "rptincgamingcd", "type": "string", "mode": "nullable"}, + {"name": "operatehosptlcd", "type": "string", "mode": "nullable"}, + {"name": "hospaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstogovtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstoindvcd", "type": "string", "mode": "nullable"}, + {"name": "rptyestocompnstncd", "type": "string", "mode": "nullable"}, + {"name": "txexmptbndcd", "type": "string", "mode": "nullable"}, + {"name": "invstproceedscd", "type": "string", "mode": "nullable"}, + {"name": "maintescrwaccntcd", "type": "string", "mode": "nullable"}, + {"name": "actonbehalfcd", "type": "string", "mode": "nullable"}, + {"name": "engageexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "awarexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "loantofficercd", "type": "string", "mode": "nullable"}, + {"name": "grantoofficercd", "type": "string", "mode": "nullable"}, + {"name": "dirbusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "fmlybusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "servasofficercd", "type": "string", "mode": "nullable"}, + {"name": "recvnoncashcd", "type": "string", "mode": "nullable"}, + {"name": "recvartcd", "type": "string", "mode": "nullable"}, + {"name": "ceaseoperationscd", "type": "string", "mode": "nullable"}, + {"name": "sellorexchcd", "type": "string", "mode": "nullable"}, + {"name": "ownsepentcd", "type": "string", "mode": "nullable"}, + {"name": "reltdorgcd", "type": "string", "mode": "nullable"}, + {"name": "intincntrlcd", "type": "string", "mode": "nullable"}, + {"name": "orgtrnsfrcd", "type": "string", "mode": "nullable"}, + {"name": "conduct5percentcd", "type": "string", "mode": "nullable"}, + {"name": "compltschocd", "type": "string", "mode": "nullable"}, + {"name": "f1096cnt", "type": "integer", "mode": "nullable"}, + {"name": "fw2gcnt", "type": "integer", "mode": "nullable"}, + {"name": "wthldngrulescd", "type": "string", "mode": "nullable"}, + {"name": "noemplyeesw3cnt", "type": "integer", "mode": "nullable"}, + {"name": "filerqrdrtnscd", "type": "string", "mode": "nullable"}, + {"name": "unrelbusinccd", "type": "string", "mode": "nullable"}, + {"name": "filedf990tcd", "type": "string", "mode": "nullable"}, + {"name": "frgnacctcd", "type": "string", "mode": "nullable"}, + {"name": "prohibtdtxshltrcd", "type": "string", "mode": "nullable"}, + {"name": "prtynotifyorgcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8886tcd", "type": "string", "mode": "nullable"}, + {"name": "solicitcntrbcd", "type": "string", "mode": "nullable"}, + {"name": "exprstmntcd", "type": "string", "mode": "nullable"}, + {"name": "providegoodscd", "type": "string", "mode": "nullable"}, + {"name": "notfydnrvalcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8282cd", "type": "string", "mode": "nullable"}, + {"name": "f8282cnt", "type": "integer", "mode": "nullable"}, + {"name": "fndsrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "premiumspaidcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8899cd", "type": "string", "mode": "nullable"}, + {"name": "filedf1098ccd", "type": "string", "mode": "nullable"}, + {"name": "excbushldngscd", "type": "string", "mode": "nullable"}, + {"name": "s4966distribcd", "type": "string", "mode": "nullable"}, + {"name": "distribtodonorcd", "type": "string", "mode": "nullable"}, + {"name": "initiationfees", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptspublicuse", "type": "integer", "mode": "nullable"}, + {"name": "grsincmembers", "type": "integer", "mode": "nullable"}, + {"name": "grsincother", "type": "integer", "mode": "nullable"}, + {"name": "filedlieuf1041cd", "type": "string", "mode": "nullable"}, + {"name": "txexmptint", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthplncd", "type": "string", "mode": "nullable"}, + {"name": "qualhlthreqmntn", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthonhnd", "type": "integer", "mode": "nullable"}, + {"name": "rcvdpdtngcd", "type": "string", "mode": "nullable"}, + {"name": "filedf720cd", "type": "string", "mode": "nullable"}, + {"name": "totreprtabled", "type": "integer", "mode": "nullable"}, + {"name": "totcomprelatede", "type": "integer", "mode": "nullable"}, + {"name": "totestcompf", "type": "integer", "mode": "nullable"}, + {"name": "noindiv100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "nocontractor100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "totcntrbgfts", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2acd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2acola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2bcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2bcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ccd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ccola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2dcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2dcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ecd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ecola", "type": "integer", "mode": "nullable"}, + {"name": "totrev2fcola", "type": "integer", "mode": "nullable"}, + {"name": "totprgmrevnue", "type": "integer", "mode": "nullable"}, + {"name": "invstmntinc", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsproceeds", "type": "integer", "mode": "nullable"}, + {"name": "royaltsinc", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsreal", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlincreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlincprsnl", "type": "integer", "mode": "nullable"}, + {"name": "netrntlinc", "type": "integer", "mode": "nullable"}, + {"name": "grsalesecur", "type": "integer", "mode": "nullable"}, + {"name": "grsalesothr", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisecur", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisothr", "type": "integer", "mode": "nullable"}, + {"name": "gnlsecur", "type": "integer", "mode": "nullable"}, + {"name": "gnlsothr", "type": "integer", "mode": "nullable"}, + {"name": "netgnls", "type": "integer", "mode": "nullable"}, + {"name": "grsincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "lessdirfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "netincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "grsincgaming", "type": "integer", "mode": "nullable"}, + {"name": "lessdirgaming", "type": "integer", "mode": "nullable"}, + {"name": "netincgaming", "type": "integer", "mode": "nullable"}, + {"name": "grsalesinvent", "type": "integer", "mode": "nullable"}, + {"name": "lesscstofgoods", "type": "integer", "mode": "nullable"}, + {"name": "netincsales", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11acd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtota", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11bcd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11b", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11ccd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11c", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11d", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11e", "type": "integer", "mode": "nullable"}, + {"name": "totrevenue", "type": "integer", "mode": "nullable"}, + {"name": "grntstogovt", "type": "integer", "mode": "nullable"}, + {"name": "grnsttoindiv", "type": "integer", "mode": "nullable"}, + {"name": "grntstofrgngovt", "type": "integer", "mode": "nullable"}, + {"name": "benifitsmembrs", "type": "integer", "mode": "nullable"}, + {"name": "compnsatncurrofcr", "type": "integer", "mode": "nullable"}, + {"name": "compnsatnandothr", "type": "integer", "mode": "nullable"}, + {"name": "othrsalwages", "type": "integer", "mode": "nullable"}, + {"name": "pensionplancontrb", "type": "integer", "mode": "nullable"}, + {"name": "othremplyeebenef", "type": "integer", "mode": "nullable"}, + {"name": "payrolltx", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcmgmt", "type": "integer", "mode": "nullable"}, + {"name": "legalfees", "type": "integer", "mode": "nullable"}, + {"name": "accntingfees", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvclobby", "type": "integer", "mode": "nullable"}, + {"name": "profndraising", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcinvstmgmt", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcothr", "type": "integer", "mode": "nullable"}, + {"name": "advrtpromo", "type": "integer", "mode": "nullable"}, + {"name": "officexpns", "type": "integer", "mode": "nullable"}, + {"name": "infotech", "type": "integer", "mode": "nullable"}, + {"name": "royaltsexpns", "type": "integer", "mode": "nullable"}, + {"name": "occupancy", "type": "integer", "mode": "nullable"}, + {"name": "travel", "type": "integer", "mode": "nullable"}, + {"name": "travelofpublicoffcl", "type": "integer", "mode": "nullable"}, + {"name": "converconventmtng", "type": "integer", "mode": "nullable"}, + {"name": "interestamt", "type": "integer", "mode": "nullable"}, + {"name": "pymtoaffiliates", "type": "integer", "mode": "nullable"}, + {"name": "deprcatndepletn", "type": "integer", "mode": "nullable"}, + {"name": "insurance", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsa", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsb", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsc", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsd", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnse", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsf", "type": "integer", "mode": "nullable"}, + {"name": "totfuncexpns", "type": "integer", "mode": "nullable"}, + {"name": "nonintcashend", "type": "integer", "mode": "nullable"}, + {"name": "svngstempinvend", "type": "integer", "mode": "nullable"}, + {"name": "pldgegrntrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "accntsrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "currfrmrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "rcvbldisqualend", "type": "integer", "mode": "nullable"}, + {"name": "notesloansrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "invntriesalesend", "type": "integer", "mode": "nullable"}, + {"name": "prepaidexpnsend", "type": "integer", "mode": "nullable"}, + {"name": "lndbldgsequipend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsothrend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsprgmend", "type": "integer", "mode": "nullable"}, + {"name": "intangibleassetsend", "type": "integer", "mode": "nullable"}, + {"name": "othrassetsend", "type": "integer", "mode": "nullable"}, + {"name": "totassetsend", "type": "integer", "mode": "nullable"}, + {"name": "accntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "grntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "deferedrevnuend", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsend", "type": "integer", "mode": "nullable"}, + {"name": "escrwaccntliabend", "type": "integer", "mode": "nullable"}, + {"name": "paybletoffcrsend", "type": "integer", "mode": "nullable"}, + {"name": "secrdmrtgsend", "type": "integer", "mode": "nullable"}, + {"name": "unsecurednotesend", "type": "integer", "mode": "nullable"}, + {"name": "othrliabend", "type": "integer", "mode": "nullable"}, + {"name": "totliabend", "type": "integer", "mode": "nullable"}, + {"name": "unrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "temprstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "permrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "capitalstktrstend", "type": "integer", "mode": "nullable"}, + {"name": "paidinsurplusend", "type": "integer", "mode": "nullable"}, + {"name": "retainedearnend", "type": "integer", "mode": "nullable"}, + {"name": "totnetassetend", "type": "integer", "mode": "nullable"}, + {"name": "totnetliabastend", "type": "integer", "mode": "nullable"}, + {"name": "nonpfrea", "type": "integer", "mode": "nullable"}, + {"name": "totnooforgscnt", "type": "integer", "mode": "nullable"}, + {"name": "totsupport", "type": "integer", "mode": "nullable"}, + {"name": "gftgrntsrcvd170", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied170", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval170", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "exceeds2pct170", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesspct170", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "grsinc170", "type": "integer", "mode": "nullable"}, + {"name": "netincunreltd170", "type": "integer", "mode": "nullable"}, + {"name": "othrinc170", "type": "integer", "mode": "nullable"}, + {"name": "totsupp170", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsrelated170", "type": "integer", "mode": "nullable"}, + {"name": "totgftgrntrcvd509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsadmissn509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsactivities509", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied509", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval509", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "rcvdfrmdisqualsub509", "type": "integer", "mode": "nullable"}, + {"name": "exceeds1pct509", "type": "integer", "mode": "nullable"}, + {"name": "subtotpub509", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesub509", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "grsinc509", "type": "integer", "mode": "nullable"}, + {"name": "unreltxincls511tx509", "type": "integer", "mode": "nullable"}, + {"name": "subtotsuppinc509", "type": "integer", "mode": "nullable"}, + {"name": "netincunrelatd509", "type": "integer", "mode": "nullable"}, + {"name": "othrinc509", "type": "integer", "mode": "nullable"}, + {"name": "totsupp509", "type": "integer", "mode": "nullable"}, + ], + ) + + irs_990_transform_csv >> load_irs_990_to_bq diff --git a/datasets/irs_990/irs_990_2014/pipeline.yaml b/datasets/irs_990/irs_990_2014/pipeline.yaml new file mode 100644 index 000000000..38b6b8bb7 --- /dev/null +++ b/datasets/irs_990/irs_990_2014/pipeline.yaml @@ -0,0 +1,846 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_2014 + + # Description of the table + description: "IRS 990 2014 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_2014 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" # runs everyday at 7am EST + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_2014" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/14eofinextract990.zip" + SOURCE_FILE: "files/data.zip" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_2014/data_output.csv" + PIPELINE_NAME: "irs_990_2015" + CSV_HEADERS: >- + ["ein","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "2G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_2014/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_2014" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name: "ein" + type: "string" + mode: "required" + - name: "tax_pd" + type: "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" + mode: "nullable" + - name: "s501c3or4947a1cd" + type: "string" + mode: "nullable" + - name: "schdbind" + type: "string" + mode: "nullable" + - name: "politicalactvtscd" + type: "string" + mode: "nullable" + - name: "lbbyingactvtscd" + type: "string" + mode: "nullable" + - name: "subjto6033cd" + type: "string" + mode: "nullable" + - name: "dnradvisedfundscd" + type: "string" + mode: "nullable" + - name: "prptyintrcvdcd" + type: "string" + mode: "nullable" + - name: "maintwrkofartcd" + type: "string" + mode: "nullable" + - name: "crcounselingqstncd" + type: "string" + mode: "nullable" + - name: "hldassetsintermpermcd" + type: "string" + mode: "nullable" + - name: "rptlndbldgeqptcd" + type: "string" + mode: "nullable" + - name: "rptinvstothsecd" + type: "string" + mode: "nullable" + - name: "rptinvstprgrelcd" + type: "string" + mode: "nullable" + - name: "rptothasstcd" + type: "string" + mode: "nullable" + - name: "rptothliabcd" + type: "string" + mode: "nullable" + - name: "sepcnsldtfinstmtcd" + type: "string" + mode: "nullable" + - name: "sepindaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "inclinfinstmtcd" + type: "string" + mode: "nullable" + - name: "operateschools170cd" + type: "string" + mode: "nullable" + - name: "frgnofficecd" + type: "string" + mode: "nullable" + - name: "frgnrevexpnscd" + type: "string" + mode: "nullable" + - name: "frgngrntscd" + type: "string" + mode: "nullable" + - name: "frgnaggragrntscd" + type: "string" + mode: "nullable" + - name: "rptprofndrsngfeescd" + type: "string" + mode: "nullable" + - name: "rptincfnndrsngcd" + type: "string" + mode: "nullable" + - name: "rptincgamingcd" + type: "string" + mode: "nullable" + - name: "operatehosptlcd" + type: "string" + mode: "nullable" + - name: "hospaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstogovtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstoindvcd" + type: "string" + mode: "nullable" + - name: "rptyestocompnstncd" + type: "string" + mode: "nullable" + - name: "txexmptbndcd" + type: "string" + mode: "nullable" + - name: "invstproceedscd" + type: "string" + mode: "nullable" + - name: "maintescrwaccntcd" + type: "string" + mode: "nullable" + - name: "actonbehalfcd" + type: "string" + mode: "nullable" + - name: "engageexcessbnftcd" + type: "string" + mode: "nullable" + - name: "awarexcessbnftcd" + type: "string" + mode: "nullable" + - name: "loantofficercd" + type: "string" + mode: "nullable" + - name: "grantoofficercd" + type: "string" + mode: "nullable" + - name: "dirbusnreltdcd" + type: "string" + mode: "nullable" + - name: "fmlybusnreltdcd" + type: "string" + mode: "nullable" + - name: "servasofficercd" + type: "string" + mode: "nullable" + - name: "recvnoncashcd" + type: "string" + mode: "nullable" + - name: "recvartcd" + type: "string" + mode: "nullable" + - name: "ceaseoperationscd" + type: "string" + mode: "nullable" + - name: "sellorexchcd" + type: "string" + mode: "nullable" + - name: "ownsepentcd" + type: "string" + mode: "nullable" + - name: "reltdorgcd" + type: "string" + mode: "nullable" + - name: "intincntrlcd" + type: "string" + mode: "nullable" + - name: "orgtrnsfrcd" + type: "string" + mode: "nullable" + - name: "conduct5percentcd" + type: "string" + mode: "nullable" + - name: "compltschocd" + type: "string" + mode: "nullable" + - name: "f1096cnt" + type: "integer" + mode: "nullable" + - name: "fw2gcnt" + type: "integer" + mode: "nullable" + - name: "wthldngrulescd" + type: "string" + mode: "nullable" + - name: "noemplyeesw3cnt" + type: "integer" + mode: "nullable" + - name: "filerqrdrtnscd" + type: "string" + mode: "nullable" + - name: "unrelbusinccd" + type: "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + mode: "nullable" + - name: "frgnacctcd" + type: "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + mode: "nullable" + - name: "prtynotifyorgcd" + type: "string" + mode: "nullable" + - name: "filedf8886tcd" + type: "string" + mode: "nullable" + - name: "solicitcntrbcd" + type: "string" + mode: "nullable" + - name: "exprstmntcd" + type: "string" + mode: "nullable" + - name: "providegoodscd" + type: "string" + mode: "nullable" + - name: "notfydnrvalcd" + type: "string" + mode: "nullable" + - name: "filedf8282cd" + type: "string" + mode: "nullable" + - name: "f8282cnt" + type: "integer" + mode: "nullable" + - name: "fndsrcvdcd" + type: "string" + mode: "nullable" + - name: "premiumspaidcd" + type: "string" + mode: "nullable" + - name: "filedf8899cd" + type: "string" + mode: "nullable" + - name: "filedf1098ccd" + type: "string" + mode: "nullable" + - name: "excbushldngscd" + type: "string" + mode: "nullable" + - name: "s4966distribcd" + type: "string" + mode: "nullable" + - name: "distribtodonorcd" + type: "string" + mode: "nullable" + - name: "initiationfees" + type: "integer" + mode: "nullable" + - name: "grsrcptspublicuse" + type: "integer" + mode: "nullable" + - name: "grsincmembers" + type: "integer" + mode: "nullable" + - name: "grsincother" + type: "integer" + mode: "nullable" + - name: "filedlieuf1041cd" + type: "string" + mode: "nullable" + - name: "txexmptint" + type: "integer" + mode: "nullable" + - name: "qualhlthplncd" + type: "string" + mode: "nullable" + - name: "qualhlthreqmntn" + type: "integer" + mode: "nullable" + - name: "qualhlthonhnd" + type: "integer" + mode: "nullable" + - name: "rcvdpdtngcd" + type: "string" + mode: "nullable" + - name: "filedf720cd" + type: "string" + mode: "nullable" + - name: "totreprtabled" + type: "integer" + mode: "nullable" + - name: "totcomprelatede" + type: "integer" + mode: "nullable" + - name: "totestcompf" + type: "integer" + mode: "nullable" + - name: "noindiv100kcnt" + type: "integer" + mode: "nullable" + - name: "nocontractor100kcnt" + type: "integer" + mode: "nullable" + - name: "totcntrbgfts" + type: "integer" + mode: "nullable" + - name: "prgmservcode2acd" + type: "integer" + mode: "nullable" + - name: "totrev2acola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2bcd" + type: "integer" + mode: "nullable" + - name: "totrev2bcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ccd" + type: "integer" + mode: "nullable" + - name: "totrev2ccola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2dcd" + type: "integer" + mode: "nullable" + - name: "totrev2dcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ecd" + type: "integer" + mode: "nullable" + - name: "totrev2ecola" + type: "integer" + mode: "nullable" + - name: "totrev2fcola" + type: "integer" + mode: "nullable" + - name: "totprgmrevnue" + type: "integer" + mode: "nullable" + - name: "invstmntinc" + type: "integer" + mode: "nullable" + - name: "txexmptbndsproceeds" + type: "integer" + mode: "nullable" + - name: "royaltsinc" + type: "integer" + mode: "nullable" + - name: "grsrntsreal" + type: "integer" + mode: "nullable" + - name: "grsrntsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlexpnsreal" + type: "integer" + mode: "nullable" + - name: "rntlexpnsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlincreal" + type: "integer" + mode: "nullable" + - name: "rntlincprsnl" + type: "integer" + mode: "nullable" + - name: "netrntlinc" + type: "integer" + mode: "nullable" + - name: "grsalesecur" + type: "integer" + mode: "nullable" + - name: "grsalesothr" + type: "integer" + mode: "nullable" + - name: "cstbasisecur" + type: "integer" + mode: "nullable" + - name: "cstbasisothr" + type: "integer" + mode: "nullable" + - name: "gnlsecur" + type: "integer" + mode: "nullable" + - name: "gnlsothr" + type: "integer" + mode: "nullable" + - name: "netgnls" + type: "integer" + mode: "nullable" + - name: "grsincfndrsng" + type: "integer" + mode: "nullable" + - name: "lessdirfndrsng" + type: "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + mode: "nullable" + - name: "lessdirgaming" + type: "integer" + mode: "nullable" + - name: "netincgaming" + type: "integer" + mode: "nullable" + - name: "grsalesinvent" + type: "integer" + mode: "nullable" + - name: "lesscstofgoods" + type: "integer" + mode: "nullable" + - name: "netincsales" + type: "integer" + mode: "nullable" + - name: "miscrev11acd" + type: "integer" + mode: "nullable" + - name: "miscrevtota" + type: "integer" + mode: "nullable" + - name: "miscrev11bcd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11b" + type: "integer" + mode: "nullable" + - name: "miscrev11ccd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11c" + type: "integer" + mode: "nullable" + - name: "miscrevtot11d" + type: "integer" + mode: "nullable" + - name: "miscrevtot11e" + type: "integer" + mode: "nullable" + - name: "totrevenue" + type: "integer" + mode: "nullable" + - name: "grntstogovt" + type: "integer" + mode: "nullable" + - name: "grnsttoindiv" + type: "integer" + mode: "nullable" + - name: "grntstofrgngovt" + type: "integer" + mode: "nullable" + - name: "benifitsmembrs" + type: "integer" + mode: "nullable" + - name: "compnsatncurrofcr" + type: "integer" + mode: "nullable" + - name: "compnsatnandothr" + type: "integer" + mode: "nullable" + - name: "othrsalwages" + type: "integer" + mode: "nullable" + - name: "pensionplancontrb" + type: "integer" + mode: "nullable" + - name: "othremplyeebenef" + type: "integer" + mode: "nullable" + - name: "payrolltx" + type: "integer" + mode: "nullable" + - name: "feesforsrvcmgmt" + type: "integer" + mode: "nullable" + - name: "legalfees" + type: "integer" + mode: "nullable" + - name: "accntingfees" + type: "integer" + mode: "nullable" + - name: "feesforsrvclobby" + type: "integer" + mode: "nullable" + - name: "profndraising" + type: "integer" + mode: "nullable" + - name: "feesforsrvcinvstmgmt" + type: "integer" + mode: "nullable" + - name: "feesforsrvcothr" + type: "integer" + mode: "nullable" + - name: "advrtpromo" + type: "integer" + mode: "nullable" + - name: "officexpns" + type: "integer" + mode: "nullable" + - name: "infotech" + type: "integer" + mode: "nullable" + - name: "royaltsexpns" + type: "integer" + mode: "nullable" + - name: "occupancy" + type: "integer" + mode: "nullable" + - name: "travel" + type: "integer" + mode: "nullable" + - name: "travelofpublicoffcl" + type: "integer" + mode: "nullable" + - name: "converconventmtng" + type: "integer" + mode: "nullable" + - name: "interestamt" + type: "integer" + mode: "nullable" + - name: "pymtoaffiliates" + type: "integer" + mode: "nullable" + - name: "deprcatndepletn" + type: "integer" + mode: "nullable" + - name: "insurance" + type: "integer" + mode: "nullable" + - name: "othrexpnsa" + type: "integer" + mode: "nullable" + - name: "othrexpnsb" + type: "integer" + mode: "nullable" + - name: "othrexpnsc" + type: "integer" + mode: "nullable" + - name: "othrexpnsd" + type: "integer" + mode: "nullable" + - name: "othrexpnse" + type: "integer" + mode: "nullable" + - name: "othrexpnsf" + type: "integer" + mode: "nullable" + - name: "totfuncexpns" + type: "integer" + mode: "nullable" + - name: "nonintcashend" + type: "integer" + mode: "nullable" + - name: "svngstempinvend" + type: "integer" + mode: "nullable" + - name: "pldgegrntrcvblend" + type: "integer" + mode: "nullable" + - name: "accntsrcvblend" + type: "integer" + mode: "nullable" + - name: "currfrmrcvblend" + type: "integer" + mode: "nullable" + - name: "rcvbldisqualend" + type: "integer" + mode: "nullable" + - name: "notesloansrcvblend" + type: "integer" + mode: "nullable" + - name: "invntriesalesend" + type: "integer" + mode: "nullable" + - name: "prepaidexpnsend" + type: "integer" + mode: "nullable" + - name: "lndbldgsequipend" + type: "integer" + mode: "nullable" + - name: "invstmntsend" + type: "integer" + mode: "nullable" + - name: "invstmntsothrend" + type: "integer" + mode: "nullable" + - name: "invstmntsprgmend" + type: "integer" + mode: "nullable" + - name: "intangibleassetsend" + type: "integer" + mode: "nullable" + - name: "othrassetsend" + type: "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" + mode: "nullable" + - name: "accntspayableend" + type: "integer" + mode: "nullable" + - name: "grntspayableend" + type: "integer" + mode: "nullable" + - name: "deferedrevnuend" + type: "integer" + mode: "nullable" + - name: "txexmptbndsend" + type: "integer" + mode: "nullable" + - name: "escrwaccntliabend" + type: "integer" + mode: "nullable" + - name: "paybletoffcrsend" + type: "integer" + mode: "nullable" + - name: "secrdmrtgsend" + type: "integer" + mode: "nullable" + - name: "unsecurednotesend" + type: "integer" + mode: "nullable" + - name: "othrliabend" + type: "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" + mode: "nullable" + - name: "unrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "temprstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "permrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "capitalstktrstend" + type: "integer" + mode: "nullable" + - name: "paidinsurplusend" + type: "integer" + mode: "nullable" + - name: "retainedearnend" + type: "integer" + mode: "nullable" + - name: "totnetassetend" + type: "integer" + mode: "nullable" + - name: "totnetliabastend" + type: "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" + mode: "nullable" + + graph_paths: + - "irs_990_transform_csv >> load_irs_990_to_bq" diff --git a/datasets/irs_990/irs_990_2015/irs_990_2015_dag.py b/datasets/irs_990/irs_990_2015/irs_990_2015_dag.py new file mode 100644 index 000000000..84abe3e3b --- /dev/null +++ b/datasets/irs_990/irs_990_2015/irs_990_2015_dag.py @@ -0,0 +1,315 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_2015", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_transform_csv", + startup_timeout_seconds=600, + name="irs_990_2015", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/15eofinextract990.dat.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_2015/data_output.csv", + "PIPELINE_NAME": "irs_990_2015", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_2015/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_2015", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + {"name": "ein", "type": "string", "mode": "required"}, + {"name": "elf", "type": "string", "mode": "nullable"}, + {"name": "tax_pd", "type": "integer", "mode": "nullable"}, + {"name": "subseccd", "type": "integer", "mode": "nullable"}, + {"name": "s501c3or4947a1cd", "type": "string", "mode": "nullable"}, + {"name": "schdbind", "type": "string", "mode": "nullable"}, + {"name": "politicalactvtscd", "type": "string", "mode": "nullable"}, + {"name": "lbbyingactvtscd", "type": "string", "mode": "nullable"}, + {"name": "subjto6033cd", "type": "string", "mode": "nullable"}, + {"name": "dnradvisedfundscd", "type": "string", "mode": "nullable"}, + {"name": "prptyintrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "maintwrkofartcd", "type": "string", "mode": "nullable"}, + {"name": "crcounselingqstncd", "type": "string", "mode": "nullable"}, + {"name": "hldassetsintermpermcd", "type": "string", "mode": "nullable"}, + {"name": "rptlndbldgeqptcd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstothsecd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstprgrelcd", "type": "string", "mode": "nullable"}, + {"name": "rptothasstcd", "type": "string", "mode": "nullable"}, + {"name": "rptothliabcd", "type": "string", "mode": "nullable"}, + {"name": "sepcnsldtfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "sepindaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "inclinfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "operateschools170cd", "type": "string", "mode": "nullable"}, + {"name": "frgnofficecd", "type": "string", "mode": "nullable"}, + {"name": "frgnrevexpnscd", "type": "string", "mode": "nullable"}, + {"name": "frgngrntscd", "type": "string", "mode": "nullable"}, + {"name": "frgnaggragrntscd", "type": "string", "mode": "nullable"}, + {"name": "rptprofndrsngfeescd", "type": "string", "mode": "nullable"}, + {"name": "rptincfnndrsngcd", "type": "string", "mode": "nullable"}, + {"name": "rptincgamingcd", "type": "string", "mode": "nullable"}, + {"name": "operatehosptlcd", "type": "string", "mode": "nullable"}, + {"name": "hospaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstogovtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstoindvcd", "type": "string", "mode": "nullable"}, + {"name": "rptyestocompnstncd", "type": "string", "mode": "nullable"}, + {"name": "txexmptbndcd", "type": "string", "mode": "nullable"}, + {"name": "invstproceedscd", "type": "string", "mode": "nullable"}, + {"name": "maintescrwaccntcd", "type": "string", "mode": "nullable"}, + {"name": "actonbehalfcd", "type": "string", "mode": "nullable"}, + {"name": "engageexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "awarexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "loantofficercd", "type": "string", "mode": "nullable"}, + {"name": "grantoofficercd", "type": "string", "mode": "nullable"}, + {"name": "dirbusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "fmlybusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "servasofficercd", "type": "string", "mode": "nullable"}, + {"name": "recvnoncashcd", "type": "string", "mode": "nullable"}, + {"name": "recvartcd", "type": "string", "mode": "nullable"}, + {"name": "ceaseoperationscd", "type": "string", "mode": "nullable"}, + {"name": "sellorexchcd", "type": "string", "mode": "nullable"}, + {"name": "ownsepentcd", "type": "string", "mode": "nullable"}, + {"name": "reltdorgcd", "type": "string", "mode": "nullable"}, + {"name": "intincntrlcd", "type": "string", "mode": "nullable"}, + {"name": "orgtrnsfrcd", "type": "string", "mode": "nullable"}, + {"name": "conduct5percentcd", "type": "string", "mode": "nullable"}, + {"name": "compltschocd", "type": "string", "mode": "nullable"}, + {"name": "f1096cnt", "type": "integer", "mode": "nullable"}, + {"name": "fw2gcnt", "type": "integer", "mode": "nullable"}, + {"name": "wthldngrulescd", "type": "string", "mode": "nullable"}, + {"name": "noemplyeesw3cnt", "type": "integer", "mode": "nullable"}, + {"name": "filerqrdrtnscd", "type": "string", "mode": "nullable"}, + {"name": "unrelbusinccd", "type": "string", "mode": "nullable"}, + {"name": "filedf990tcd", "type": "string", "mode": "nullable"}, + {"name": "frgnacctcd", "type": "string", "mode": "nullable"}, + {"name": "prohibtdtxshltrcd", "type": "string", "mode": "nullable"}, + {"name": "prtynotifyorgcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8886tcd", "type": "string", "mode": "nullable"}, + {"name": "solicitcntrbcd", "type": "string", "mode": "nullable"}, + {"name": "exprstmntcd", "type": "string", "mode": "nullable"}, + {"name": "providegoodscd", "type": "string", "mode": "nullable"}, + {"name": "notfydnrvalcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8282cd", "type": "string", "mode": "nullable"}, + {"name": "f8282cnt", "type": "integer", "mode": "nullable"}, + {"name": "fndsrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "premiumspaidcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8899cd", "type": "string", "mode": "nullable"}, + {"name": "filedf1098ccd", "type": "string", "mode": "nullable"}, + {"name": "excbushldngscd", "type": "string", "mode": "nullable"}, + {"name": "s4966distribcd", "type": "string", "mode": "nullable"}, + {"name": "distribtodonorcd", "type": "string", "mode": "nullable"}, + {"name": "initiationfees", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptspublicuse", "type": "integer", "mode": "nullable"}, + {"name": "grsincmembers", "type": "integer", "mode": "nullable"}, + {"name": "grsincother", "type": "integer", "mode": "nullable"}, + {"name": "filedlieuf1041cd", "type": "string", "mode": "nullable"}, + {"name": "txexmptint", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthplncd", "type": "string", "mode": "nullable"}, + {"name": "qualhlthreqmntn", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthonhnd", "type": "integer", "mode": "nullable"}, + {"name": "rcvdpdtngcd", "type": "string", "mode": "nullable"}, + {"name": "filedf720cd", "type": "string", "mode": "nullable"}, + {"name": "totreprtabled", "type": "integer", "mode": "nullable"}, + {"name": "totcomprelatede", "type": "integer", "mode": "nullable"}, + {"name": "totestcompf", "type": "integer", "mode": "nullable"}, + {"name": "noindiv100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "nocontractor100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "totcntrbgfts", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2acd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2acola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2bcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2bcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ccd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ccola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2dcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2dcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ecd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ecola", "type": "integer", "mode": "nullable"}, + {"name": "totrev2fcola", "type": "integer", "mode": "nullable"}, + {"name": "totprgmrevnue", "type": "integer", "mode": "nullable"}, + {"name": "invstmntinc", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsproceeds", "type": "integer", "mode": "nullable"}, + {"name": "royaltsinc", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsreal", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlincreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlincprsnl", "type": "integer", "mode": "nullable"}, + {"name": "netrntlinc", "type": "integer", "mode": "nullable"}, + {"name": "grsalesecur", "type": "integer", "mode": "nullable"}, + {"name": "grsalesothr", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisecur", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisothr", "type": "integer", "mode": "nullable"}, + {"name": "gnlsecur", "type": "integer", "mode": "nullable"}, + {"name": "gnlsothr", "type": "integer", "mode": "nullable"}, + {"name": "netgnls", "type": "integer", "mode": "nullable"}, + {"name": "grsincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "lessdirfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "netincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "grsincgaming", "type": "integer", "mode": "nullable"}, + {"name": "lessdirgaming", "type": "integer", "mode": "nullable"}, + {"name": "netincgaming", "type": "integer", "mode": "nullable"}, + {"name": "grsalesinvent", "type": "integer", "mode": "nullable"}, + {"name": "lesscstofgoods", "type": "integer", "mode": "nullable"}, + {"name": "netincsales", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11acd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtota", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11bcd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11b", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11ccd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11c", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11d", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11e", "type": "integer", "mode": "nullable"}, + {"name": "totrevenue", "type": "integer", "mode": "nullable"}, + {"name": "grntstogovt", "type": "integer", "mode": "nullable"}, + {"name": "grnsttoindiv", "type": "integer", "mode": "nullable"}, + {"name": "grntstofrgngovt", "type": "integer", "mode": "nullable"}, + {"name": "benifitsmembrs", "type": "integer", "mode": "nullable"}, + {"name": "compnsatncurrofcr", "type": "integer", "mode": "nullable"}, + {"name": "compnsatnandothr", "type": "integer", "mode": "nullable"}, + {"name": "othrsalwages", "type": "integer", "mode": "nullable"}, + {"name": "pensionplancontrb", "type": "integer", "mode": "nullable"}, + {"name": "othremplyeebenef", "type": "integer", "mode": "nullable"}, + {"name": "payrolltx", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcmgmt", "type": "integer", "mode": "nullable"}, + {"name": "legalfees", "type": "integer", "mode": "nullable"}, + {"name": "accntingfees", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvclobby", "type": "integer", "mode": "nullable"}, + {"name": "profndraising", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcinvstmgmt", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcothr", "type": "integer", "mode": "nullable"}, + {"name": "advrtpromo", "type": "integer", "mode": "nullable"}, + {"name": "officexpns", "type": "integer", "mode": "nullable"}, + {"name": "infotech", "type": "integer", "mode": "nullable"}, + {"name": "royaltsexpns", "type": "integer", "mode": "nullable"}, + {"name": "occupancy", "type": "integer", "mode": "nullable"}, + {"name": "travel", "type": "integer", "mode": "nullable"}, + {"name": "travelofpublicoffcl", "type": "integer", "mode": "nullable"}, + {"name": "converconventmtng", "type": "integer", "mode": "nullable"}, + {"name": "interestamt", "type": "integer", "mode": "nullable"}, + {"name": "pymtoaffiliates", "type": "integer", "mode": "nullable"}, + {"name": "deprcatndepletn", "type": "integer", "mode": "nullable"}, + {"name": "insurance", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsa", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsb", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsc", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsd", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnse", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsf", "type": "integer", "mode": "nullable"}, + {"name": "totfuncexpns", "type": "integer", "mode": "nullable"}, + {"name": "nonintcashend", "type": "integer", "mode": "nullable"}, + {"name": "svngstempinvend", "type": "integer", "mode": "nullable"}, + {"name": "pldgegrntrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "accntsrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "currfrmrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "rcvbldisqualend", "type": "integer", "mode": "nullable"}, + {"name": "notesloansrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "invntriesalesend", "type": "integer", "mode": "nullable"}, + {"name": "prepaidexpnsend", "type": "integer", "mode": "nullable"}, + {"name": "lndbldgsequipend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsothrend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsprgmend", "type": "integer", "mode": "nullable"}, + {"name": "intangibleassetsend", "type": "integer", "mode": "nullable"}, + {"name": "othrassetsend", "type": "integer", "mode": "nullable"}, + {"name": "totassetsend", "type": "integer", "mode": "nullable"}, + {"name": "accntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "grntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "deferedrevnuend", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsend", "type": "integer", "mode": "nullable"}, + {"name": "escrwaccntliabend", "type": "integer", "mode": "nullable"}, + {"name": "paybletoffcrsend", "type": "integer", "mode": "nullable"}, + {"name": "secrdmrtgsend", "type": "integer", "mode": "nullable"}, + {"name": "unsecurednotesend", "type": "integer", "mode": "nullable"}, + {"name": "othrliabend", "type": "integer", "mode": "nullable"}, + {"name": "totliabend", "type": "integer", "mode": "nullable"}, + {"name": "unrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "temprstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "permrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "capitalstktrstend", "type": "integer", "mode": "nullable"}, + {"name": "paidinsurplusend", "type": "integer", "mode": "nullable"}, + {"name": "retainedearnend", "type": "integer", "mode": "nullable"}, + {"name": "totnetassetend", "type": "integer", "mode": "nullable"}, + {"name": "totnetliabastend", "type": "integer", "mode": "nullable"}, + {"name": "nonpfrea", "type": "integer", "mode": "nullable"}, + {"name": "totnooforgscnt", "type": "integer", "mode": "nullable"}, + {"name": "totsupport", "type": "integer", "mode": "nullable"}, + {"name": "gftgrntsrcvd170", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied170", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval170", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "exceeds2pct170", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesspct170", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "grsinc170", "type": "integer", "mode": "nullable"}, + {"name": "netincunreltd170", "type": "integer", "mode": "nullable"}, + {"name": "othrinc170", "type": "integer", "mode": "nullable"}, + {"name": "totsupp170", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsrelated170", "type": "integer", "mode": "nullable"}, + {"name": "totgftgrntrcvd509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsadmissn509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsactivities509", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied509", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval509", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "rcvdfrmdisqualsub509", "type": "integer", "mode": "nullable"}, + {"name": "exceeds1pct509", "type": "integer", "mode": "nullable"}, + {"name": "subtotpub509", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesub509", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "grsinc509", "type": "integer", "mode": "nullable"}, + {"name": "unreltxincls511tx509", "type": "integer", "mode": "nullable"}, + {"name": "subtotsuppinc509", "type": "integer", "mode": "nullable"}, + {"name": "netincunrelatd509", "type": "integer", "mode": "nullable"}, + {"name": "othrinc509", "type": "integer", "mode": "nullable"}, + {"name": "totsupp509", "type": "integer", "mode": "nullable"}, + ], + ) + + irs_990_transform_csv >> load_irs_990_to_bq diff --git a/datasets/irs_990/irs_990_2015/pipeline.yaml b/datasets/irs_990/irs_990_2015/pipeline.yaml new file mode 100644 index 000000000..d12a83ccc --- /dev/null +++ b/datasets/irs_990/irs_990_2015/pipeline.yaml @@ -0,0 +1,847 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_2015 + + # Description of the table + description: "IRS 990 2015 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_2015 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" # runs everyday at 7am EST + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_2015" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/15eofinextract990.dat.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_2015/data_output.csv" + PIPELINE_NAME: "irs_990_2015" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"} + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_2015/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_2015" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + schema_fields: + - name: "ein" + type: "string" + mode: "required" + - name: "elf" + type: "string" + mode: "nullable" + - name: "tax_pd" + type: "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" + mode: "nullable" + - name: "s501c3or4947a1cd" + type: "string" + mode: "nullable" + - name: "schdbind" + type: "string" + mode: "nullable" + - name: "politicalactvtscd" + type: "string" + mode: "nullable" + - name: "lbbyingactvtscd" + type: "string" + mode: "nullable" + - name: "subjto6033cd" + type: "string" + mode: "nullable" + - name: "dnradvisedfundscd" + type: "string" + mode: "nullable" + - name: "prptyintrcvdcd" + type: "string" + mode: "nullable" + - name: "maintwrkofartcd" + type: "string" + mode: "nullable" + - name: "crcounselingqstncd" + type: "string" + mode: "nullable" + - name: "hldassetsintermpermcd" + type: "string" + mode: "nullable" + - name: "rptlndbldgeqptcd" + type: "string" + mode: "nullable" + - name: "rptinvstothsecd" + type: "string" + mode: "nullable" + - name: "rptinvstprgrelcd" + type: "string" + mode: "nullable" + - name: "rptothasstcd" + type: "string" + mode: "nullable" + - name: "rptothliabcd" + type: "string" + mode: "nullable" + - name: "sepcnsldtfinstmtcd" + type: "string" + mode: "nullable" + - name: "sepindaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "inclinfinstmtcd" + type: "string" + mode: "nullable" + - name: "operateschools170cd" + type: "string" + mode: "nullable" + - name: "frgnofficecd" + type: "string" + mode: "nullable" + - name: "frgnrevexpnscd" + type: "string" + mode: "nullable" + - name: "frgngrntscd" + type: "string" + mode: "nullable" + - name: "frgnaggragrntscd" + type: "string" + mode: "nullable" + - name: "rptprofndrsngfeescd" + type: "string" + mode: "nullable" + - name: "rptincfnndrsngcd" + type: "string" + mode: "nullable" + - name: "rptincgamingcd" + type: "string" + mode: "nullable" + - name: "operatehosptlcd" + type: "string" + mode: "nullable" + - name: "hospaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstogovtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstoindvcd" + type: "string" + mode: "nullable" + - name: "rptyestocompnstncd" + type: "string" + mode: "nullable" + - name: "txexmptbndcd" + type: "string" + mode: "nullable" + - name: "invstproceedscd" + type: "string" + mode: "nullable" + - name: "maintescrwaccntcd" + type: "string" + mode: "nullable" + - name: "actonbehalfcd" + type: "string" + mode: "nullable" + - name: "engageexcessbnftcd" + type: "string" + mode: "nullable" + - name: "awarexcessbnftcd" + type: "string" + mode: "nullable" + - name: "loantofficercd" + type: "string" + mode: "nullable" + - name: "grantoofficercd" + type: "string" + mode: "nullable" + - name: "dirbusnreltdcd" + type: "string" + mode: "nullable" + - name: "fmlybusnreltdcd" + type: "string" + mode: "nullable" + - name: "servasofficercd" + type: "string" + mode: "nullable" + - name: "recvnoncashcd" + type: "string" + mode: "nullable" + - name: "recvartcd" + type: "string" + mode: "nullable" + - name: "ceaseoperationscd" + type: "string" + mode: "nullable" + - name: "sellorexchcd" + type: "string" + mode: "nullable" + - name: "ownsepentcd" + type: "string" + mode: "nullable" + - name: "reltdorgcd" + type: "string" + mode: "nullable" + - name: "intincntrlcd" + type: "string" + mode: "nullable" + - name: "orgtrnsfrcd" + type: "string" + mode: "nullable" + - name: "conduct5percentcd" + type: "string" + mode: "nullable" + - name: "compltschocd" + type: "string" + mode: "nullable" + - name: "f1096cnt" + type: "integer" + mode: "nullable" + - name: "fw2gcnt" + type: "integer" + mode: "nullable" + - name: "wthldngrulescd" + type: "string" + mode: "nullable" + - name: "noemplyeesw3cnt" + type: "integer" + mode: "nullable" + - name: "filerqrdrtnscd" + type: "string" + mode: "nullable" + - name: "unrelbusinccd" + type: "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + mode: "nullable" + - name: "frgnacctcd" + type: "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + mode: "nullable" + - name: "prtynotifyorgcd" + type: "string" + mode: "nullable" + - name: "filedf8886tcd" + type: "string" + mode: "nullable" + - name: "solicitcntrbcd" + type: "string" + mode: "nullable" + - name: "exprstmntcd" + type: "string" + mode: "nullable" + - name: "providegoodscd" + type: "string" + mode: "nullable" + - name: "notfydnrvalcd" + type: "string" + mode: "nullable" + - name: "filedf8282cd" + type: "string" + mode: "nullable" + - name: "f8282cnt" + type: "integer" + mode: "nullable" + - name: "fndsrcvdcd" + type: "string" + mode: "nullable" + - name: "premiumspaidcd" + type: "string" + mode: "nullable" + - name: "filedf8899cd" + type: "string" + mode: "nullable" + - name: "filedf1098ccd" + type: "string" + mode: "nullable" + - name: "excbushldngscd" + type: "string" + mode: "nullable" + - name: "s4966distribcd" + type: "string" + mode: "nullable" + - name: "distribtodonorcd" + type: "string" + mode: "nullable" + - name: "initiationfees" + type: "integer" + mode: "nullable" + - name: "grsrcptspublicuse" + type: "integer" + mode: "nullable" + - name: "grsincmembers" + type: "integer" + mode: "nullable" + - name: "grsincother" + type: "integer" + mode: "nullable" + - name: "filedlieuf1041cd" + type: "string" + mode: "nullable" + - name: "txexmptint" + type: "integer" + mode: "nullable" + - name: "qualhlthplncd" + type: "string" + mode: "nullable" + - name: "qualhlthreqmntn" + type: "integer" + mode: "nullable" + - name: "qualhlthonhnd" + type: "integer" + mode: "nullable" + - name: "rcvdpdtngcd" + type: "string" + mode: "nullable" + - name: "filedf720cd" + type: "string" + mode: "nullable" + - name: "totreprtabled" + type: "integer" + mode: "nullable" + - name: "totcomprelatede" + type: "integer" + mode: "nullable" + - name: "totestcompf" + type: "integer" + mode: "nullable" + - name: "noindiv100kcnt" + type: "integer" + mode: "nullable" + - name: "nocontractor100kcnt" + type: "integer" + mode: "nullable" + - name: "totcntrbgfts" + type: "integer" + mode: "nullable" + - name: "prgmservcode2acd" + type: "integer" + mode: "nullable" + - name: "totrev2acola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2bcd" + type: "integer" + mode: "nullable" + - name: "totrev2bcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ccd" + type: "integer" + mode: "nullable" + - name: "totrev2ccola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2dcd" + type: "integer" + mode: "nullable" + - name: "totrev2dcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ecd" + type: "integer" + mode: "nullable" + - name: "totrev2ecola" + type: "integer" + mode: "nullable" + - name: "totrev2fcola" + type: "integer" + mode: "nullable" + - name: "totprgmrevnue" + type: "integer" + mode: "nullable" + - name: "invstmntinc" + type: "integer" + mode: "nullable" + - name: "txexmptbndsproceeds" + type: "integer" + mode: "nullable" + - name: "royaltsinc" + type: "integer" + mode: "nullable" + - name: "grsrntsreal" + type: "integer" + mode: "nullable" + - name: "grsrntsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlexpnsreal" + type: "integer" + mode: "nullable" + - name: "rntlexpnsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlincreal" + type: "integer" + mode: "nullable" + - name: "rntlincprsnl" + type: "integer" + mode: "nullable" + - name: "netrntlinc" + type: "integer" + mode: "nullable" + - name: "grsalesecur" + type: "integer" + mode: "nullable" + - name: "grsalesothr" + type: "integer" + mode: "nullable" + - name: "cstbasisecur" + type: "integer" + mode: "nullable" + - name: "cstbasisothr" + type: "integer" + mode: "nullable" + - name: "gnlsecur" + type: "integer" + mode: "nullable" + - name: "gnlsothr" + type: "integer" + mode: "nullable" + - name: "netgnls" + type: "integer" + mode: "nullable" + - name: "grsincfndrsng" + type: "integer" + mode: "nullable" + - name: "lessdirfndrsng" + type: "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + mode: "nullable" + - name: "lessdirgaming" + type: "integer" + mode: "nullable" + - name: "netincgaming" + type: "integer" + mode: "nullable" + - name: "grsalesinvent" + type: "integer" + mode: "nullable" + - name: "lesscstofgoods" + type: "integer" + mode: "nullable" + - name: "netincsales" + type: "integer" + mode: "nullable" + - name: "miscrev11acd" + type: "integer" + mode: "nullable" + - name: "miscrevtota" + type: "integer" + mode: "nullable" + - name: "miscrev11bcd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11b" + type: "integer" + mode: "nullable" + - name: "miscrev11ccd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11c" + type: "integer" + mode: "nullable" + - name: "miscrevtot11d" + type: "integer" + mode: "nullable" + - name: "miscrevtot11e" + type: "integer" + mode: "nullable" + - name: "totrevenue" + type: "integer" + mode: "nullable" + - name: "grntstogovt" + type: "integer" + mode: "nullable" + - name: "grnsttoindiv" + type: "integer" + mode: "nullable" + - name: "grntstofrgngovt" + type: "integer" + mode: "nullable" + - name: "benifitsmembrs" + type: "integer" + mode: "nullable" + - name: "compnsatncurrofcr" + type: "integer" + mode: "nullable" + - name: "compnsatnandothr" + type: "integer" + mode: "nullable" + - name: "othrsalwages" + type: "integer" + mode: "nullable" + - name: "pensionplancontrb" + type: "integer" + mode: "nullable" + - name: "othremplyeebenef" + type: "integer" + mode: "nullable" + - name: "payrolltx" + type: "integer" + mode: "nullable" + - name: "feesforsrvcmgmt" + type: "integer" + mode: "nullable" + - name: "legalfees" + type: "integer" + mode: "nullable" + - name: "accntingfees" + type: "integer" + mode: "nullable" + - name: "feesforsrvclobby" + type: "integer" + mode: "nullable" + - name: "profndraising" + type: "integer" + mode: "nullable" + - name: "feesforsrvcinvstmgmt" + type: "integer" + mode: "nullable" + - name: "feesforsrvcothr" + type: "integer" + mode: "nullable" + - name: "advrtpromo" + type: "integer" + mode: "nullable" + - name: "officexpns" + type: "integer" + mode: "nullable" + - name: "infotech" + type: "integer" + mode: "nullable" + - name: "royaltsexpns" + type: "integer" + mode: "nullable" + - name: "occupancy" + type: "integer" + mode: "nullable" + - name: "travel" + type: "integer" + mode: "nullable" + - name: "travelofpublicoffcl" + type: "integer" + mode: "nullable" + - name: "converconventmtng" + type: "integer" + mode: "nullable" + - name: "interestamt" + type: "integer" + mode: "nullable" + - name: "pymtoaffiliates" + type: "integer" + mode: "nullable" + - name: "deprcatndepletn" + type: "integer" + mode: "nullable" + - name: "insurance" + type: "integer" + mode: "nullable" + - name: "othrexpnsa" + type: "integer" + mode: "nullable" + - name: "othrexpnsb" + type: "integer" + mode: "nullable" + - name: "othrexpnsc" + type: "integer" + mode: "nullable" + - name: "othrexpnsd" + type: "integer" + mode: "nullable" + - name: "othrexpnse" + type: "integer" + mode: "nullable" + - name: "othrexpnsf" + type: "integer" + mode: "nullable" + - name: "totfuncexpns" + type: "integer" + mode: "nullable" + - name: "nonintcashend" + type: "integer" + mode: "nullable" + - name: "svngstempinvend" + type: "integer" + mode: "nullable" + - name: "pldgegrntrcvblend" + type: "integer" + mode: "nullable" + - name: "accntsrcvblend" + type: "integer" + mode: "nullable" + - name: "currfrmrcvblend" + type: "integer" + mode: "nullable" + - name: "rcvbldisqualend" + type: "integer" + mode: "nullable" + - name: "notesloansrcvblend" + type: "integer" + mode: "nullable" + - name: "invntriesalesend" + type: "integer" + mode: "nullable" + - name: "prepaidexpnsend" + type: "integer" + mode: "nullable" + - name: "lndbldgsequipend" + type: "integer" + mode: "nullable" + - name: "invstmntsend" + type: "integer" + mode: "nullable" + - name: "invstmntsothrend" + type: "integer" + mode: "nullable" + - name: "invstmntsprgmend" + type: "integer" + mode: "nullable" + - name: "intangibleassetsend" + type: "integer" + mode: "nullable" + - name: "othrassetsend" + type: "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" + mode: "nullable" + - name: "accntspayableend" + type: "integer" + mode: "nullable" + - name: "grntspayableend" + type: "integer" + mode: "nullable" + - name: "deferedrevnuend" + type: "integer" + mode: "nullable" + - name: "txexmptbndsend" + type: "integer" + mode: "nullable" + - name: "escrwaccntliabend" + type: "integer" + mode: "nullable" + - name: "paybletoffcrsend" + type: "integer" + mode: "nullable" + - name: "secrdmrtgsend" + type: "integer" + mode: "nullable" + - name: "unsecurednotesend" + type: "integer" + mode: "nullable" + - name: "othrliabend" + type: "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" + mode: "nullable" + - name: "unrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "temprstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "permrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "capitalstktrstend" + type: "integer" + mode: "nullable" + - name: "paidinsurplusend" + type: "integer" + mode: "nullable" + - name: "retainedearnend" + type: "integer" + mode: "nullable" + - name: "totnetassetend" + type: "integer" + mode: "nullable" + - name: "totnetliabastend" + type: "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" + mode: "nullable" + + graph_paths: + - "irs_990_transform_csv >> load_irs_990_to_bq" diff --git a/datasets/irs_990/irs_990_2016/irs_990_2016_dag.py b/datasets/irs_990/irs_990_2016/irs_990_2016_dag.py new file mode 100644 index 000000000..de26999dd --- /dev/null +++ b/datasets/irs_990/irs_990_2016/irs_990_2016_dag.py @@ -0,0 +1,315 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_2016", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_2016_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_2016_transform_csv", + startup_timeout_seconds=600, + name="irs_990_2016", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/16eofinextract990.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_2016/data_output.csv", + "PIPELINE_NAME": "irs_990_2016", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_2016_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_2016_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_2016/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_2016", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + {"name": "ein", "type": "string", "mode": "required"}, + {"name": "elf", "type": "string", "mode": "nullable"}, + {"name": "tax_pd", "type": "integer", "mode": "nullable"}, + {"name": "subseccd", "type": "integer", "mode": "nullable"}, + {"name": "s501c3or4947a1cd", "type": "string", "mode": "nullable"}, + {"name": "schdbind", "type": "string", "mode": "nullable"}, + {"name": "politicalactvtscd", "type": "string", "mode": "nullable"}, + {"name": "lbbyingactvtscd", "type": "string", "mode": "nullable"}, + {"name": "subjto6033cd", "type": "string", "mode": "nullable"}, + {"name": "dnradvisedfundscd", "type": "string", "mode": "nullable"}, + {"name": "prptyintrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "maintwrkofartcd", "type": "string", "mode": "nullable"}, + {"name": "crcounselingqstncd", "type": "string", "mode": "nullable"}, + {"name": "hldassetsintermpermcd", "type": "string", "mode": "nullable"}, + {"name": "rptlndbldgeqptcd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstothsecd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstprgrelcd", "type": "string", "mode": "nullable"}, + {"name": "rptothasstcd", "type": "string", "mode": "nullable"}, + {"name": "rptothliabcd", "type": "string", "mode": "nullable"}, + {"name": "sepcnsldtfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "sepindaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "inclinfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "operateschools170cd", "type": "string", "mode": "nullable"}, + {"name": "frgnofficecd", "type": "string", "mode": "nullable"}, + {"name": "frgnrevexpnscd", "type": "string", "mode": "nullable"}, + {"name": "frgngrntscd", "type": "string", "mode": "nullable"}, + {"name": "frgnaggragrntscd", "type": "string", "mode": "nullable"}, + {"name": "rptprofndrsngfeescd", "type": "string", "mode": "nullable"}, + {"name": "rptincfnndrsngcd", "type": "string", "mode": "nullable"}, + {"name": "rptincgamingcd", "type": "string", "mode": "nullable"}, + {"name": "operatehosptlcd", "type": "string", "mode": "nullable"}, + {"name": "hospaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstogovtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstoindvcd", "type": "string", "mode": "nullable"}, + {"name": "rptyestocompnstncd", "type": "string", "mode": "nullable"}, + {"name": "txexmptbndcd", "type": "string", "mode": "nullable"}, + {"name": "invstproceedscd", "type": "string", "mode": "nullable"}, + {"name": "maintescrwaccntcd", "type": "string", "mode": "nullable"}, + {"name": "actonbehalfcd", "type": "string", "mode": "nullable"}, + {"name": "engageexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "awarexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "loantofficercd", "type": "string", "mode": "nullable"}, + {"name": "grantoofficercd", "type": "string", "mode": "nullable"}, + {"name": "dirbusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "fmlybusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "servasofficercd", "type": "string", "mode": "nullable"}, + {"name": "recvnoncashcd", "type": "string", "mode": "nullable"}, + {"name": "recvartcd", "type": "string", "mode": "nullable"}, + {"name": "ceaseoperationscd", "type": "string", "mode": "nullable"}, + {"name": "sellorexchcd", "type": "string", "mode": "nullable"}, + {"name": "ownsepentcd", "type": "string", "mode": "nullable"}, + {"name": "reltdorgcd", "type": "string", "mode": "nullable"}, + {"name": "intincntrlcd", "type": "string", "mode": "nullable"}, + {"name": "orgtrnsfrcd", "type": "string", "mode": "nullable"}, + {"name": "conduct5percentcd", "type": "string", "mode": "nullable"}, + {"name": "compltschocd", "type": "string", "mode": "nullable"}, + {"name": "f1096cnt", "type": "integer", "mode": "nullable"}, + {"name": "fw2gcnt", "type": "integer", "mode": "nullable"}, + {"name": "wthldngrulescd", "type": "string", "mode": "nullable"}, + {"name": "noemplyeesw3cnt", "type": "integer", "mode": "nullable"}, + {"name": "filerqrdrtnscd", "type": "string", "mode": "nullable"}, + {"name": "unrelbusinccd", "type": "string", "mode": "nullable"}, + {"name": "filedf990tcd", "type": "string", "mode": "nullable"}, + {"name": "frgnacctcd", "type": "string", "mode": "nullable"}, + {"name": "prohibtdtxshltrcd", "type": "string", "mode": "nullable"}, + {"name": "prtynotifyorgcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8886tcd", "type": "string", "mode": "nullable"}, + {"name": "solicitcntrbcd", "type": "string", "mode": "nullable"}, + {"name": "exprstmntcd", "type": "string", "mode": "nullable"}, + {"name": "providegoodscd", "type": "string", "mode": "nullable"}, + {"name": "notfydnrvalcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8282cd", "type": "string", "mode": "nullable"}, + {"name": "f8282cnt", "type": "integer", "mode": "nullable"}, + {"name": "fndsrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "premiumspaidcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8899cd", "type": "string", "mode": "nullable"}, + {"name": "filedf1098ccd", "type": "string", "mode": "nullable"}, + {"name": "excbushldngscd", "type": "string", "mode": "nullable"}, + {"name": "s4966distribcd", "type": "string", "mode": "nullable"}, + {"name": "distribtodonorcd", "type": "string", "mode": "nullable"}, + {"name": "initiationfees", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptspublicuse", "type": "integer", "mode": "nullable"}, + {"name": "grsincmembers", "type": "integer", "mode": "nullable"}, + {"name": "grsincother", "type": "integer", "mode": "nullable"}, + {"name": "filedlieuf1041cd", "type": "string", "mode": "nullable"}, + {"name": "txexmptint", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthplncd", "type": "string", "mode": "nullable"}, + {"name": "qualhlthreqmntn", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthonhnd", "type": "integer", "mode": "nullable"}, + {"name": "rcvdpdtngcd", "type": "string", "mode": "nullable"}, + {"name": "filedf720cd", "type": "string", "mode": "nullable"}, + {"name": "totreprtabled", "type": "integer", "mode": "nullable"}, + {"name": "totcomprelatede", "type": "integer", "mode": "nullable"}, + {"name": "totestcompf", "type": "integer", "mode": "nullable"}, + {"name": "noindiv100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "nocontractor100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "totcntrbgfts", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2acd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2acola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2bcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2bcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ccd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ccola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2dcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2dcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ecd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ecola", "type": "integer", "mode": "nullable"}, + {"name": "totrev2fcola", "type": "integer", "mode": "nullable"}, + {"name": "totprgmrevnue", "type": "integer", "mode": "nullable"}, + {"name": "invstmntinc", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsproceeds", "type": "integer", "mode": "nullable"}, + {"name": "royaltsinc", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsreal", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlincreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlincprsnl", "type": "integer", "mode": "nullable"}, + {"name": "netrntlinc", "type": "integer", "mode": "nullable"}, + {"name": "grsalesecur", "type": "integer", "mode": "nullable"}, + {"name": "grsalesothr", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisecur", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisothr", "type": "integer", "mode": "nullable"}, + {"name": "gnlsecur", "type": "integer", "mode": "nullable"}, + {"name": "gnlsothr", "type": "integer", "mode": "nullable"}, + {"name": "netgnls", "type": "integer", "mode": "nullable"}, + {"name": "grsincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "lessdirfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "netincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "grsincgaming", "type": "integer", "mode": "nullable"}, + {"name": "lessdirgaming", "type": "integer", "mode": "nullable"}, + {"name": "netincgaming", "type": "integer", "mode": "nullable"}, + {"name": "grsalesinvent", "type": "integer", "mode": "nullable"}, + {"name": "lesscstofgoods", "type": "integer", "mode": "nullable"}, + {"name": "netincsales", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11acd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtota", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11bcd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11b", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11ccd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11c", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11d", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11e", "type": "integer", "mode": "nullable"}, + {"name": "totrevenue", "type": "integer", "mode": "nullable"}, + {"name": "grntstogovt", "type": "integer", "mode": "nullable"}, + {"name": "grnsttoindiv", "type": "integer", "mode": "nullable"}, + {"name": "grntstofrgngovt", "type": "integer", "mode": "nullable"}, + {"name": "benifitsmembrs", "type": "integer", "mode": "nullable"}, + {"name": "compnsatncurrofcr", "type": "integer", "mode": "nullable"}, + {"name": "compnsatnandothr", "type": "integer", "mode": "nullable"}, + {"name": "othrsalwages", "type": "integer", "mode": "nullable"}, + {"name": "pensionplancontrb", "type": "integer", "mode": "nullable"}, + {"name": "othremplyeebenef", "type": "integer", "mode": "nullable"}, + {"name": "payrolltx", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcmgmt", "type": "integer", "mode": "nullable"}, + {"name": "legalfees", "type": "integer", "mode": "nullable"}, + {"name": "accntingfees", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvclobby", "type": "integer", "mode": "nullable"}, + {"name": "profndraising", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcinvstmgmt", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcothr", "type": "integer", "mode": "nullable"}, + {"name": "advrtpromo", "type": "integer", "mode": "nullable"}, + {"name": "officexpns", "type": "integer", "mode": "nullable"}, + {"name": "infotech", "type": "integer", "mode": "nullable"}, + {"name": "royaltsexpns", "type": "integer", "mode": "nullable"}, + {"name": "occupancy", "type": "integer", "mode": "nullable"}, + {"name": "travel", "type": "integer", "mode": "nullable"}, + {"name": "travelofpublicoffcl", "type": "integer", "mode": "nullable"}, + {"name": "converconventmtng", "type": "integer", "mode": "nullable"}, + {"name": "interestamt", "type": "integer", "mode": "nullable"}, + {"name": "pymtoaffiliates", "type": "integer", "mode": "nullable"}, + {"name": "deprcatndepletn", "type": "integer", "mode": "nullable"}, + {"name": "insurance", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsa", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsb", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsc", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsd", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnse", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsf", "type": "integer", "mode": "nullable"}, + {"name": "totfuncexpns", "type": "integer", "mode": "nullable"}, + {"name": "nonintcashend", "type": "integer", "mode": "nullable"}, + {"name": "svngstempinvend", "type": "integer", "mode": "nullable"}, + {"name": "pldgegrntrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "accntsrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "currfrmrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "rcvbldisqualend", "type": "integer", "mode": "nullable"}, + {"name": "notesloansrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "invntriesalesend", "type": "integer", "mode": "nullable"}, + {"name": "prepaidexpnsend", "type": "integer", "mode": "nullable"}, + {"name": "lndbldgsequipend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsothrend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsprgmend", "type": "integer", "mode": "nullable"}, + {"name": "intangibleassetsend", "type": "integer", "mode": "nullable"}, + {"name": "othrassetsend", "type": "integer", "mode": "nullable"}, + {"name": "totassetsend", "type": "integer", "mode": "nullable"}, + {"name": "accntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "grntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "deferedrevnuend", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsend", "type": "integer", "mode": "nullable"}, + {"name": "escrwaccntliabend", "type": "integer", "mode": "nullable"}, + {"name": "paybletoffcrsend", "type": "integer", "mode": "nullable"}, + {"name": "secrdmrtgsend", "type": "integer", "mode": "nullable"}, + {"name": "unsecurednotesend", "type": "integer", "mode": "nullable"}, + {"name": "othrliabend", "type": "integer", "mode": "nullable"}, + {"name": "totliabend", "type": "integer", "mode": "nullable"}, + {"name": "unrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "temprstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "permrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "capitalstktrstend", "type": "integer", "mode": "nullable"}, + {"name": "paidinsurplusend", "type": "integer", "mode": "nullable"}, + {"name": "retainedearnend", "type": "integer", "mode": "nullable"}, + {"name": "totnetassetend", "type": "integer", "mode": "nullable"}, + {"name": "totnetliabastend", "type": "integer", "mode": "nullable"}, + {"name": "nonpfrea", "type": "integer", "mode": "nullable"}, + {"name": "totnooforgscnt", "type": "integer", "mode": "nullable"}, + {"name": "totsupport", "type": "integer", "mode": "nullable"}, + {"name": "gftgrntsrcvd170", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied170", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval170", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "exceeds2pct170", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesspct170", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "grsinc170", "type": "integer", "mode": "nullable"}, + {"name": "netincunreltd170", "type": "integer", "mode": "nullable"}, + {"name": "othrinc170", "type": "integer", "mode": "nullable"}, + {"name": "totsupp170", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsrelated170", "type": "integer", "mode": "nullable"}, + {"name": "totgftgrntrcvd509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsadmissn509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsactivities509", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied509", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval509", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "rcvdfrmdisqualsub509", "type": "integer", "mode": "nullable"}, + {"name": "exceeds1pct509", "type": "integer", "mode": "nullable"}, + {"name": "subtotpub509", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesub509", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "grsinc509", "type": "integer", "mode": "nullable"}, + {"name": "unreltxincls511tx509", "type": "integer", "mode": "nullable"}, + {"name": "subtotsuppinc509", "type": "integer", "mode": "nullable"}, + {"name": "netincunrelatd509", "type": "integer", "mode": "nullable"}, + {"name": "othrinc509", "type": "integer", "mode": "nullable"}, + {"name": "totsupp509", "type": "integer", "mode": "nullable"}, + ], + ) + + irs_990_2016_transform_csv >> load_irs_990_2016_to_bq diff --git a/datasets/irs_990/irs_990_2016/pipeline.yaml b/datasets/irs_990/irs_990_2016/pipeline.yaml new file mode 100644 index 000000000..04a24c38f --- /dev/null +++ b/datasets/irs_990/irs_990_2016/pipeline.yaml @@ -0,0 +1,848 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_2016 + + # Description of the table + description: "IRS 990 2016 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_2016 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_2016_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_2016" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/16eofinextract990.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_2016/data_output.csv" + PIPELINE_NAME: "irs_990_2016" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_2016_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_2016/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_2016" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + schema_fields: + - name: "ein" + type: "string" + mode: "required" + - name: "elf" + type: "string" + mode: "nullable" + - name: "tax_pd" + type: "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" + mode: "nullable" + - name: "s501c3or4947a1cd" + type: "string" + mode: "nullable" + - name: "schdbind" + type: "string" + mode: "nullable" + - name: "politicalactvtscd" + type: "string" + mode: "nullable" + - name: "lbbyingactvtscd" + type: "string" + mode: "nullable" + - name: "subjto6033cd" + type: "string" + mode: "nullable" + - name: "dnradvisedfundscd" + type: "string" + mode: "nullable" + - name: "prptyintrcvdcd" + type: "string" + mode: "nullable" + - name: "maintwrkofartcd" + type: "string" + mode: "nullable" + - name: "crcounselingqstncd" + type: "string" + mode: "nullable" + - name: "hldassetsintermpermcd" + type: "string" + mode: "nullable" + - name: "rptlndbldgeqptcd" + type: "string" + mode: "nullable" + - name: "rptinvstothsecd" + type: "string" + mode: "nullable" + - name: "rptinvstprgrelcd" + type: "string" + mode: "nullable" + - name: "rptothasstcd" + type: "string" + mode: "nullable" + - name: "rptothliabcd" + type: "string" + mode: "nullable" + - name: "sepcnsldtfinstmtcd" + type: "string" + mode: "nullable" + - name: "sepindaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "inclinfinstmtcd" + type: "string" + mode: "nullable" + - name: "operateschools170cd" + type: "string" + mode: "nullable" + - name: "frgnofficecd" + type: "string" + mode: "nullable" + - name: "frgnrevexpnscd" + type: "string" + mode: "nullable" + - name: "frgngrntscd" + type: "string" + mode: "nullable" + - name: "frgnaggragrntscd" + type: "string" + mode: "nullable" + - name: "rptprofndrsngfeescd" + type: "string" + mode: "nullable" + - name: "rptincfnndrsngcd" + type: "string" + mode: "nullable" + - name: "rptincgamingcd" + type: "string" + mode: "nullable" + - name: "operatehosptlcd" + type: "string" + mode: "nullable" + - name: "hospaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstogovtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstoindvcd" + type: "string" + mode: "nullable" + - name: "rptyestocompnstncd" + type: "string" + mode: "nullable" + - name: "txexmptbndcd" + type: "string" + mode: "nullable" + - name: "invstproceedscd" + type: "string" + mode: "nullable" + - name: "maintescrwaccntcd" + type: "string" + mode: "nullable" + - name: "actonbehalfcd" + type: "string" + mode: "nullable" + - name: "engageexcessbnftcd" + type: "string" + mode: "nullable" + - name: "awarexcessbnftcd" + type: "string" + mode: "nullable" + - name: "loantofficercd" + type: "string" + mode: "nullable" + - name: "grantoofficercd" + type: "string" + mode: "nullable" + - name: "dirbusnreltdcd" + type: "string" + mode: "nullable" + - name: "fmlybusnreltdcd" + type: "string" + mode: "nullable" + - name: "servasofficercd" + type: "string" + mode: "nullable" + - name: "recvnoncashcd" + type: "string" + mode: "nullable" + - name: "recvartcd" + type: "string" + mode: "nullable" + - name: "ceaseoperationscd" + type: "string" + mode: "nullable" + - name: "sellorexchcd" + type: "string" + mode: "nullable" + - name: "ownsepentcd" + type: "string" + mode: "nullable" + - name: "reltdorgcd" + type: "string" + mode: "nullable" + - name: "intincntrlcd" + type: "string" + mode: "nullable" + - name: "orgtrnsfrcd" + type: "string" + mode: "nullable" + - name: "conduct5percentcd" + type: "string" + mode: "nullable" + - name: "compltschocd" + type: "string" + mode: "nullable" + - name: "f1096cnt" + type: "integer" + mode: "nullable" + - name: "fw2gcnt" + type: "integer" + mode: "nullable" + - name: "wthldngrulescd" + type: "string" + mode: "nullable" + - name: "noemplyeesw3cnt" + type: "integer" + mode: "nullable" + - name: "filerqrdrtnscd" + type: "string" + mode: "nullable" + - name: "unrelbusinccd" + type: "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + mode: "nullable" + - name: "frgnacctcd" + type: "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + mode: "nullable" + - name: "prtynotifyorgcd" + type: "string" + mode: "nullable" + - name: "filedf8886tcd" + type: "string" + mode: "nullable" + - name: "solicitcntrbcd" + type: "string" + mode: "nullable" + - name: "exprstmntcd" + type: "string" + mode: "nullable" + - name: "providegoodscd" + type: "string" + mode: "nullable" + - name: "notfydnrvalcd" + type: "string" + mode: "nullable" + - name: "filedf8282cd" + type: "string" + mode: "nullable" + - name: "f8282cnt" + type: "integer" + mode: "nullable" + - name: "fndsrcvdcd" + type: "string" + mode: "nullable" + - name: "premiumspaidcd" + type: "string" + mode: "nullable" + - name: "filedf8899cd" + type: "string" + mode: "nullable" + - name: "filedf1098ccd" + type: "string" + mode: "nullable" + - name: "excbushldngscd" + type: "string" + mode: "nullable" + - name: "s4966distribcd" + type: "string" + mode: "nullable" + - name: "distribtodonorcd" + type: "string" + mode: "nullable" + - name: "initiationfees" + type: "integer" + mode: "nullable" + - name: "grsrcptspublicuse" + type: "integer" + mode: "nullable" + - name: "grsincmembers" + type: "integer" + mode: "nullable" + - name: "grsincother" + type: "integer" + mode: "nullable" + - name: "filedlieuf1041cd" + type: "string" + mode: "nullable" + - name: "txexmptint" + type: "integer" + mode: "nullable" + - name: "qualhlthplncd" + type: "string" + mode: "nullable" + - name: "qualhlthreqmntn" + type: "integer" + mode: "nullable" + - name: "qualhlthonhnd" + type: "integer" + mode: "nullable" + - name: "rcvdpdtngcd" + type: "string" + mode: "nullable" + - name: "filedf720cd" + type: "string" + mode: "nullable" + - name: "totreprtabled" + type: "integer" + mode: "nullable" + - name: "totcomprelatede" + type: "integer" + mode: "nullable" + - name: "totestcompf" + type: "integer" + mode: "nullable" + - name: "noindiv100kcnt" + type: "integer" + mode: "nullable" + - name: "nocontractor100kcnt" + type: "integer" + mode: "nullable" + - name: "totcntrbgfts" + type: "integer" + mode: "nullable" + - name: "prgmservcode2acd" + type: "integer" + mode: "nullable" + - name: "totrev2acola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2bcd" + type: "integer" + mode: "nullable" + - name: "totrev2bcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ccd" + type: "integer" + mode: "nullable" + - name: "totrev2ccola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2dcd" + type: "integer" + mode: "nullable" + - name: "totrev2dcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ecd" + type: "integer" + mode: "nullable" + - name: "totrev2ecola" + type: "integer" + mode: "nullable" + - name: "totrev2fcola" + type: "integer" + mode: "nullable" + - name: "totprgmrevnue" + type: "integer" + mode: "nullable" + - name: "invstmntinc" + type: "integer" + mode: "nullable" + - name: "txexmptbndsproceeds" + type: "integer" + mode: "nullable" + - name: "royaltsinc" + type: "integer" + mode: "nullable" + - name: "grsrntsreal" + type: "integer" + mode: "nullable" + - name: "grsrntsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlexpnsreal" + type: "integer" + mode: "nullable" + - name: "rntlexpnsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlincreal" + type: "integer" + mode: "nullable" + - name: "rntlincprsnl" + type: "integer" + mode: "nullable" + - name: "netrntlinc" + type: "integer" + mode: "nullable" + - name: "grsalesecur" + type: "integer" + mode: "nullable" + - name: "grsalesothr" + type: "integer" + mode: "nullable" + - name: "cstbasisecur" + type: "integer" + mode: "nullable" + - name: "cstbasisothr" + type: "integer" + mode: "nullable" + - name: "gnlsecur" + type: "integer" + mode: "nullable" + - name: "gnlsothr" + type: "integer" + mode: "nullable" + - name: "netgnls" + type: "integer" + mode: "nullable" + - name: "grsincfndrsng" + type: "integer" + mode: "nullable" + - name: "lessdirfndrsng" + type: "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + mode: "nullable" + - name: "lessdirgaming" + type: "integer" + mode: "nullable" + - name: "netincgaming" + type: "integer" + mode: "nullable" + - name: "grsalesinvent" + type: "integer" + mode: "nullable" + - name: "lesscstofgoods" + type: "integer" + mode: "nullable" + - name: "netincsales" + type: "integer" + mode: "nullable" + - name: "miscrev11acd" + type: "integer" + mode: "nullable" + - name: "miscrevtota" + type: "integer" + mode: "nullable" + - name: "miscrev11bcd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11b" + type: "integer" + mode: "nullable" + - name: "miscrev11ccd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11c" + type: "integer" + mode: "nullable" + - name: "miscrevtot11d" + type: "integer" + mode: "nullable" + - name: "miscrevtot11e" + type: "integer" + mode: "nullable" + - name: "totrevenue" + type: "integer" + mode: "nullable" + - name: "grntstogovt" + type: "integer" + mode: "nullable" + - name: "grnsttoindiv" + type: "integer" + mode: "nullable" + - name: "grntstofrgngovt" + type: "integer" + mode: "nullable" + - name: "benifitsmembrs" + type: "integer" + mode: "nullable" + - name: "compnsatncurrofcr" + type: "integer" + mode: "nullable" + - name: "compnsatnandothr" + type: "integer" + mode: "nullable" + - name: "othrsalwages" + type: "integer" + mode: "nullable" + - name: "pensionplancontrb" + type: "integer" + mode: "nullable" + - name: "othremplyeebenef" + type: "integer" + mode: "nullable" + - name: "payrolltx" + type: "integer" + mode: "nullable" + - name: "feesforsrvcmgmt" + type: "integer" + mode: "nullable" + - name: "legalfees" + type: "integer" + mode: "nullable" + - name: "accntingfees" + type: "integer" + mode: "nullable" + - name: "feesforsrvclobby" + type: "integer" + mode: "nullable" + - name: "profndraising" + type: "integer" + mode: "nullable" + - name: "feesforsrvcinvstmgmt" + type: "integer" + mode: "nullable" + - name: "feesforsrvcothr" + type: "integer" + mode: "nullable" + - name: "advrtpromo" + type: "integer" + mode: "nullable" + - name: "officexpns" + type: "integer" + mode: "nullable" + - name: "infotech" + type: "integer" + mode: "nullable" + - name: "royaltsexpns" + type: "integer" + mode: "nullable" + - name: "occupancy" + type: "integer" + mode: "nullable" + - name: "travel" + type: "integer" + mode: "nullable" + - name: "travelofpublicoffcl" + type: "integer" + mode: "nullable" + - name: "converconventmtng" + type: "integer" + mode: "nullable" + - name: "interestamt" + type: "integer" + mode: "nullable" + - name: "pymtoaffiliates" + type: "integer" + mode: "nullable" + - name: "deprcatndepletn" + type: "integer" + mode: "nullable" + - name: "insurance" + type: "integer" + mode: "nullable" + - name: "othrexpnsa" + type: "integer" + mode: "nullable" + - name: "othrexpnsb" + type: "integer" + mode: "nullable" + - name: "othrexpnsc" + type: "integer" + mode: "nullable" + - name: "othrexpnsd" + type: "integer" + mode: "nullable" + - name: "othrexpnse" + type: "integer" + mode: "nullable" + - name: "othrexpnsf" + type: "integer" + mode: "nullable" + - name: "totfuncexpns" + type: "integer" + mode: "nullable" + - name: "nonintcashend" + type: "integer" + mode: "nullable" + - name: "svngstempinvend" + type: "integer" + mode: "nullable" + - name: "pldgegrntrcvblend" + type: "integer" + mode: "nullable" + - name: "accntsrcvblend" + type: "integer" + mode: "nullable" + - name: "currfrmrcvblend" + type: "integer" + mode: "nullable" + - name: "rcvbldisqualend" + type: "integer" + mode: "nullable" + - name: "notesloansrcvblend" + type: "integer" + mode: "nullable" + - name: "invntriesalesend" + type: "integer" + mode: "nullable" + - name: "prepaidexpnsend" + type: "integer" + mode: "nullable" + - name: "lndbldgsequipend" + type: "integer" + mode: "nullable" + - name: "invstmntsend" + type: "integer" + mode: "nullable" + - name: "invstmntsothrend" + type: "integer" + mode: "nullable" + - name: "invstmntsprgmend" + type: "integer" + mode: "nullable" + - name: "intangibleassetsend" + type: "integer" + mode: "nullable" + - name: "othrassetsend" + type: "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" + mode: "nullable" + - name: "accntspayableend" + type: "integer" + mode: "nullable" + - name: "grntspayableend" + type: "integer" + mode: "nullable" + - name: "deferedrevnuend" + type: "integer" + mode: "nullable" + - name: "txexmptbndsend" + type: "integer" + mode: "nullable" + - name: "escrwaccntliabend" + type: "integer" + mode: "nullable" + - name: "paybletoffcrsend" + type: "integer" + mode: "nullable" + - name: "secrdmrtgsend" + type: "integer" + mode: "nullable" + - name: "unsecurednotesend" + type: "integer" + mode: "nullable" + - name: "othrliabend" + type: "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" + mode: "nullable" + - name: "unrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "temprstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "permrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "capitalstktrstend" + type: "integer" + mode: "nullable" + - name: "paidinsurplusend" + type: "integer" + mode: "nullable" + - name: "retainedearnend" + type: "integer" + mode: "nullable" + - name: "totnetassetend" + type: "integer" + mode: "nullable" + - name: "totnetliabastend" + type: "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" + mode: "nullable" + + graph_paths: + - "irs_990_2016_transform_csv >> load_irs_990_2016_to_bq" diff --git a/datasets/irs_990/irs_990_2017/irs_990_2017_dag.py b/datasets/irs_990/irs_990_2017/irs_990_2017_dag.py new file mode 100644 index 000000000..22f04f976 --- /dev/null +++ b/datasets/irs_990/irs_990_2017/irs_990_2017_dag.py @@ -0,0 +1,315 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_2017", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_2017_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_2017_transform_csv", + startup_timeout_seconds=600, + name="irs_990_2017", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/17eofinextract990.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_2017/data_output.csv", + "PIPELINE_NAME": "irs_990_2017", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_2017_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_2017_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_2017/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_2017", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + {"name": "ein", "type": "string", "mode": "required"}, + {"name": "elf", "type": "string", "mode": "nullable"}, + {"name": "tax_pd", "type": "integer", "mode": "nullable"}, + {"name": "subseccd", "type": "integer", "mode": "nullable"}, + {"name": "s501c3or4947a1cd", "type": "string", "mode": "nullable"}, + {"name": "schdbind", "type": "string", "mode": "nullable"}, + {"name": "politicalactvtscd", "type": "string", "mode": "nullable"}, + {"name": "lbbyingactvtscd", "type": "string", "mode": "nullable"}, + {"name": "subjto6033cd", "type": "string", "mode": "nullable"}, + {"name": "dnradvisedfundscd", "type": "string", "mode": "nullable"}, + {"name": "prptyintrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "maintwrkofartcd", "type": "string", "mode": "nullable"}, + {"name": "crcounselingqstncd", "type": "string", "mode": "nullable"}, + {"name": "hldassetsintermpermcd", "type": "string", "mode": "nullable"}, + {"name": "rptlndbldgeqptcd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstothsecd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstprgrelcd", "type": "string", "mode": "nullable"}, + {"name": "rptothasstcd", "type": "string", "mode": "nullable"}, + {"name": "rptothliabcd", "type": "string", "mode": "nullable"}, + {"name": "sepcnsldtfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "sepindaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "inclinfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "operateschools170cd", "type": "string", "mode": "nullable"}, + {"name": "frgnofficecd", "type": "string", "mode": "nullable"}, + {"name": "frgnrevexpnscd", "type": "string", "mode": "nullable"}, + {"name": "frgngrntscd", "type": "string", "mode": "nullable"}, + {"name": "frgnaggragrntscd", "type": "string", "mode": "nullable"}, + {"name": "rptprofndrsngfeescd", "type": "string", "mode": "nullable"}, + {"name": "rptincfnndrsngcd", "type": "string", "mode": "nullable"}, + {"name": "rptincgamingcd", "type": "string", "mode": "nullable"}, + {"name": "operatehosptlcd", "type": "string", "mode": "nullable"}, + {"name": "hospaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstogovtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstoindvcd", "type": "string", "mode": "nullable"}, + {"name": "rptyestocompnstncd", "type": "string", "mode": "nullable"}, + {"name": "txexmptbndcd", "type": "string", "mode": "nullable"}, + {"name": "invstproceedscd", "type": "string", "mode": "nullable"}, + {"name": "maintescrwaccntcd", "type": "string", "mode": "nullable"}, + {"name": "actonbehalfcd", "type": "string", "mode": "nullable"}, + {"name": "engageexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "awarexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "loantofficercd", "type": "string", "mode": "nullable"}, + {"name": "grantoofficercd", "type": "string", "mode": "nullable"}, + {"name": "dirbusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "fmlybusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "servasofficercd", "type": "string", "mode": "nullable"}, + {"name": "recvnoncashcd", "type": "string", "mode": "nullable"}, + {"name": "recvartcd", "type": "string", "mode": "nullable"}, + {"name": "ceaseoperationscd", "type": "string", "mode": "nullable"}, + {"name": "sellorexchcd", "type": "string", "mode": "nullable"}, + {"name": "ownsepentcd", "type": "string", "mode": "nullable"}, + {"name": "reltdorgcd", "type": "string", "mode": "nullable"}, + {"name": "intincntrlcd", "type": "string", "mode": "nullable"}, + {"name": "orgtrnsfrcd", "type": "string", "mode": "nullable"}, + {"name": "conduct5percentcd", "type": "string", "mode": "nullable"}, + {"name": "compltschocd", "type": "string", "mode": "nullable"}, + {"name": "f1096cnt", "type": "integer", "mode": "nullable"}, + {"name": "fw2gcnt", "type": "integer", "mode": "nullable"}, + {"name": "wthldngrulescd", "type": "string", "mode": "nullable"}, + {"name": "noemplyeesw3cnt", "type": "integer", "mode": "nullable"}, + {"name": "filerqrdrtnscd", "type": "string", "mode": "nullable"}, + {"name": "unrelbusinccd", "type": "string", "mode": "nullable"}, + {"name": "filedf990tcd", "type": "string", "mode": "nullable"}, + {"name": "frgnacctcd", "type": "string", "mode": "nullable"}, + {"name": "prohibtdtxshltrcd", "type": "string", "mode": "nullable"}, + {"name": "prtynotifyorgcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8886tcd", "type": "string", "mode": "nullable"}, + {"name": "solicitcntrbcd", "type": "string", "mode": "nullable"}, + {"name": "exprstmntcd", "type": "string", "mode": "nullable"}, + {"name": "providegoodscd", "type": "string", "mode": "nullable"}, + {"name": "notfydnrvalcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8282cd", "type": "string", "mode": "nullable"}, + {"name": "f8282cnt", "type": "integer", "mode": "nullable"}, + {"name": "fndsrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "premiumspaidcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8899cd", "type": "string", "mode": "nullable"}, + {"name": "filedf1098ccd", "type": "string", "mode": "nullable"}, + {"name": "excbushldngscd", "type": "string", "mode": "nullable"}, + {"name": "s4966distribcd", "type": "string", "mode": "nullable"}, + {"name": "distribtodonorcd", "type": "string", "mode": "nullable"}, + {"name": "initiationfees", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptspublicuse", "type": "integer", "mode": "nullable"}, + {"name": "grsincmembers", "type": "integer", "mode": "nullable"}, + {"name": "grsincother", "type": "integer", "mode": "nullable"}, + {"name": "filedlieuf1041cd", "type": "string", "mode": "nullable"}, + {"name": "txexmptint", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthplncd", "type": "string", "mode": "nullable"}, + {"name": "qualhlthreqmntn", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthonhnd", "type": "integer", "mode": "nullable"}, + {"name": "rcvdpdtngcd", "type": "string", "mode": "nullable"}, + {"name": "filedf720cd", "type": "string", "mode": "nullable"}, + {"name": "totreprtabled", "type": "integer", "mode": "nullable"}, + {"name": "totcomprelatede", "type": "integer", "mode": "nullable"}, + {"name": "totestcompf", "type": "integer", "mode": "nullable"}, + {"name": "noindiv100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "nocontractor100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "totcntrbgfts", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2acd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2acola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2bcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2bcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ccd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ccola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2dcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2dcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ecd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ecola", "type": "integer", "mode": "nullable"}, + {"name": "totrev2fcola", "type": "integer", "mode": "nullable"}, + {"name": "totprgmrevnue", "type": "integer", "mode": "nullable"}, + {"name": "invstmntinc", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsproceeds", "type": "integer", "mode": "nullable"}, + {"name": "royaltsinc", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsreal", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlincreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlincprsnl", "type": "integer", "mode": "nullable"}, + {"name": "netrntlinc", "type": "integer", "mode": "nullable"}, + {"name": "grsalesecur", "type": "integer", "mode": "nullable"}, + {"name": "grsalesothr", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisecur", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisothr", "type": "integer", "mode": "nullable"}, + {"name": "gnlsecur", "type": "integer", "mode": "nullable"}, + {"name": "gnlsothr", "type": "integer", "mode": "nullable"}, + {"name": "netgnls", "type": "integer", "mode": "nullable"}, + {"name": "grsincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "lessdirfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "netincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "grsincgaming", "type": "integer", "mode": "nullable"}, + {"name": "lessdirgaming", "type": "integer", "mode": "nullable"}, + {"name": "netincgaming", "type": "integer", "mode": "nullable"}, + {"name": "grsalesinvent", "type": "integer", "mode": "nullable"}, + {"name": "lesscstofgoods", "type": "integer", "mode": "nullable"}, + {"name": "netincsales", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11acd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtota", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11bcd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11b", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11ccd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11c", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11d", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11e", "type": "integer", "mode": "nullable"}, + {"name": "totrevenue", "type": "integer", "mode": "nullable"}, + {"name": "grntstogovt", "type": "integer", "mode": "nullable"}, + {"name": "grnsttoindiv", "type": "integer", "mode": "nullable"}, + {"name": "grntstofrgngovt", "type": "integer", "mode": "nullable"}, + {"name": "benifitsmembrs", "type": "integer", "mode": "nullable"}, + {"name": "compnsatncurrofcr", "type": "integer", "mode": "nullable"}, + {"name": "compnsatnandothr", "type": "integer", "mode": "nullable"}, + {"name": "othrsalwages", "type": "integer", "mode": "nullable"}, + {"name": "pensionplancontrb", "type": "integer", "mode": "nullable"}, + {"name": "othremplyeebenef", "type": "integer", "mode": "nullable"}, + {"name": "payrolltx", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcmgmt", "type": "integer", "mode": "nullable"}, + {"name": "legalfees", "type": "integer", "mode": "nullable"}, + {"name": "accntingfees", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvclobby", "type": "integer", "mode": "nullable"}, + {"name": "profndraising", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcinvstmgmt", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcothr", "type": "integer", "mode": "nullable"}, + {"name": "advrtpromo", "type": "integer", "mode": "nullable"}, + {"name": "officexpns", "type": "integer", "mode": "nullable"}, + {"name": "infotech", "type": "integer", "mode": "nullable"}, + {"name": "royaltsexpns", "type": "integer", "mode": "nullable"}, + {"name": "occupancy", "type": "integer", "mode": "nullable"}, + {"name": "travel", "type": "integer", "mode": "nullable"}, + {"name": "travelofpublicoffcl", "type": "integer", "mode": "nullable"}, + {"name": "converconventmtng", "type": "integer", "mode": "nullable"}, + {"name": "interestamt", "type": "integer", "mode": "nullable"}, + {"name": "pymtoaffiliates", "type": "integer", "mode": "nullable"}, + {"name": "deprcatndepletn", "type": "integer", "mode": "nullable"}, + {"name": "insurance", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsa", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsb", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsc", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsd", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnse", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsf", "type": "integer", "mode": "nullable"}, + {"name": "totfuncexpns", "type": "integer", "mode": "nullable"}, + {"name": "nonintcashend", "type": "integer", "mode": "nullable"}, + {"name": "svngstempinvend", "type": "integer", "mode": "nullable"}, + {"name": "pldgegrntrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "accntsrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "currfrmrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "rcvbldisqualend", "type": "integer", "mode": "nullable"}, + {"name": "notesloansrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "invntriesalesend", "type": "integer", "mode": "nullable"}, + {"name": "prepaidexpnsend", "type": "integer", "mode": "nullable"}, + {"name": "lndbldgsequipend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsothrend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsprgmend", "type": "integer", "mode": "nullable"}, + {"name": "intangibleassetsend", "type": "integer", "mode": "nullable"}, + {"name": "othrassetsend", "type": "integer", "mode": "nullable"}, + {"name": "totassetsend", "type": "integer", "mode": "nullable"}, + {"name": "accntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "grntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "deferedrevnuend", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsend", "type": "integer", "mode": "nullable"}, + {"name": "escrwaccntliabend", "type": "integer", "mode": "nullable"}, + {"name": "paybletoffcrsend", "type": "integer", "mode": "nullable"}, + {"name": "secrdmrtgsend", "type": "integer", "mode": "nullable"}, + {"name": "unsecurednotesend", "type": "integer", "mode": "nullable"}, + {"name": "othrliabend", "type": "integer", "mode": "nullable"}, + {"name": "totliabend", "type": "integer", "mode": "nullable"}, + {"name": "unrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "temprstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "permrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "capitalstktrstend", "type": "integer", "mode": "nullable"}, + {"name": "paidinsurplusend", "type": "integer", "mode": "nullable"}, + {"name": "retainedearnend", "type": "integer", "mode": "nullable"}, + {"name": "totnetassetend", "type": "integer", "mode": "nullable"}, + {"name": "totnetliabastend", "type": "integer", "mode": "nullable"}, + {"name": "nonpfrea", "type": "integer", "mode": "nullable"}, + {"name": "totnooforgscnt", "type": "integer", "mode": "nullable"}, + {"name": "totsupport", "type": "integer", "mode": "nullable"}, + {"name": "gftgrntsrcvd170", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied170", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval170", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "exceeds2pct170", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesspct170", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "grsinc170", "type": "integer", "mode": "nullable"}, + {"name": "netincunreltd170", "type": "integer", "mode": "nullable"}, + {"name": "othrinc170", "type": "integer", "mode": "nullable"}, + {"name": "totsupp170", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsrelated170", "type": "integer", "mode": "nullable"}, + {"name": "totgftgrntrcvd509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsadmissn509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsactivities509", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied509", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval509", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "rcvdfrmdisqualsub509", "type": "integer", "mode": "nullable"}, + {"name": "exceeds1pct509", "type": "integer", "mode": "nullable"}, + {"name": "subtotpub509", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesub509", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "grsinc509", "type": "integer", "mode": "nullable"}, + {"name": "unreltxincls511tx509", "type": "integer", "mode": "nullable"}, + {"name": "subtotsuppinc509", "type": "integer", "mode": "nullable"}, + {"name": "netincunrelatd509", "type": "integer", "mode": "nullable"}, + {"name": "othrinc509", "type": "integer", "mode": "nullable"}, + {"name": "totsupp509", "type": "integer", "mode": "nullable"}, + ], + ) + + irs_990_2017_transform_csv >> load_irs_990_2017_to_bq diff --git a/datasets/irs_990/irs_990_2017/pipeline.yaml b/datasets/irs_990/irs_990_2017/pipeline.yaml new file mode 100644 index 000000000..d19f5d0ab --- /dev/null +++ b/datasets/irs_990/irs_990_2017/pipeline.yaml @@ -0,0 +1,849 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_2017 + + # Description of the table + description: "IRS 990 2017 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_2017 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_2017_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_2017" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/17eofinextract990.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_2017/data_output.csv" + PIPELINE_NAME: "irs_990_2017" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_2017_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_2017/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_2017" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name: "ein" + type: "string" + mode: "required" + - name: "elf" + type: "string" + mode: "nullable" + - name: "tax_pd" + type: "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" + mode: "nullable" + - name: "s501c3or4947a1cd" + type: "string" + mode: "nullable" + - name: "schdbind" + type: "string" + mode: "nullable" + - name: "politicalactvtscd" + type: "string" + mode: "nullable" + - name: "lbbyingactvtscd" + type: "string" + mode: "nullable" + - name: "subjto6033cd" + type: "string" + mode: "nullable" + - name: "dnradvisedfundscd" + type: "string" + mode: "nullable" + - name: "prptyintrcvdcd" + type: "string" + mode: "nullable" + - name: "maintwrkofartcd" + type: "string" + mode: "nullable" + - name: "crcounselingqstncd" + type: "string" + mode: "nullable" + - name: "hldassetsintermpermcd" + type: "string" + mode: "nullable" + - name: "rptlndbldgeqptcd" + type: "string" + mode: "nullable" + - name: "rptinvstothsecd" + type: "string" + mode: "nullable" + - name: "rptinvstprgrelcd" + type: "string" + mode: "nullable" + - name: "rptothasstcd" + type: "string" + mode: "nullable" + - name: "rptothliabcd" + type: "string" + mode: "nullable" + - name: "sepcnsldtfinstmtcd" + type: "string" + mode: "nullable" + - name: "sepindaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "inclinfinstmtcd" + type: "string" + mode: "nullable" + - name: "operateschools170cd" + type: "string" + mode: "nullable" + - name: "frgnofficecd" + type: "string" + mode: "nullable" + - name: "frgnrevexpnscd" + type: "string" + mode: "nullable" + - name: "frgngrntscd" + type: "string" + mode: "nullable" + - name: "frgnaggragrntscd" + type: "string" + mode: "nullable" + - name: "rptprofndrsngfeescd" + type: "string" + mode: "nullable" + - name: "rptincfnndrsngcd" + type: "string" + mode: "nullable" + - name: "rptincgamingcd" + type: "string" + mode: "nullable" + - name: "operatehosptlcd" + type: "string" + mode: "nullable" + - name: "hospaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstogovtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstoindvcd" + type: "string" + mode: "nullable" + - name: "rptyestocompnstncd" + type: "string" + mode: "nullable" + - name: "txexmptbndcd" + type: "string" + mode: "nullable" + - name: "invstproceedscd" + type: "string" + mode: "nullable" + - name: "maintescrwaccntcd" + type: "string" + mode: "nullable" + - name: "actonbehalfcd" + type: "string" + mode: "nullable" + - name: "engageexcessbnftcd" + type: "string" + mode: "nullable" + - name: "awarexcessbnftcd" + type: "string" + mode: "nullable" + - name: "loantofficercd" + type: "string" + mode: "nullable" + - name: "grantoofficercd" + type: "string" + mode: "nullable" + - name: "dirbusnreltdcd" + type: "string" + mode: "nullable" + - name: "fmlybusnreltdcd" + type: "string" + mode: "nullable" + - name: "servasofficercd" + type: "string" + mode: "nullable" + - name: "recvnoncashcd" + type: "string" + mode: "nullable" + - name: "recvartcd" + type: "string" + mode: "nullable" + - name: "ceaseoperationscd" + type: "string" + mode: "nullable" + - name: "sellorexchcd" + type: "string" + mode: "nullable" + - name: "ownsepentcd" + type: "string" + mode: "nullable" + - name: "reltdorgcd" + type: "string" + mode: "nullable" + - name: "intincntrlcd" + type: "string" + mode: "nullable" + - name: "orgtrnsfrcd" + type: "string" + mode: "nullable" + - name: "conduct5percentcd" + type: "string" + mode: "nullable" + - name: "compltschocd" + type: "string" + mode: "nullable" + - name: "f1096cnt" + type: "integer" + mode: "nullable" + - name: "fw2gcnt" + type: "integer" + mode: "nullable" + - name: "wthldngrulescd" + type: "string" + mode: "nullable" + - name: "noemplyeesw3cnt" + type: "integer" + mode: "nullable" + - name: "filerqrdrtnscd" + type: "string" + mode: "nullable" + - name: "unrelbusinccd" + type: "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + mode: "nullable" + - name: "frgnacctcd" + type: "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + mode: "nullable" + - name: "prtynotifyorgcd" + type: "string" + mode: "nullable" + - name: "filedf8886tcd" + type: "string" + mode: "nullable" + - name: "solicitcntrbcd" + type: "string" + mode: "nullable" + - name: "exprstmntcd" + type: "string" + mode: "nullable" + - name: "providegoodscd" + type: "string" + mode: "nullable" + - name: "notfydnrvalcd" + type: "string" + mode: "nullable" + - name: "filedf8282cd" + type: "string" + mode: "nullable" + - name: "f8282cnt" + type: "integer" + mode: "nullable" + - name: "fndsrcvdcd" + type: "string" + mode: "nullable" + - name: "premiumspaidcd" + type: "string" + mode: "nullable" + - name: "filedf8899cd" + type: "string" + mode: "nullable" + - name: "filedf1098ccd" + type: "string" + mode: "nullable" + - name: "excbushldngscd" + type: "string" + mode: "nullable" + - name: "s4966distribcd" + type: "string" + mode: "nullable" + - name: "distribtodonorcd" + type: "string" + mode: "nullable" + - name: "initiationfees" + type: "integer" + mode: "nullable" + - name: "grsrcptspublicuse" + type: "integer" + mode: "nullable" + - name: "grsincmembers" + type: "integer" + mode: "nullable" + - name: "grsincother" + type: "integer" + mode: "nullable" + - name: "filedlieuf1041cd" + type: "string" + mode: "nullable" + - name: "txexmptint" + type: "integer" + mode: "nullable" + - name: "qualhlthplncd" + type: "string" + mode: "nullable" + - name: "qualhlthreqmntn" + type: "integer" + mode: "nullable" + - name: "qualhlthonhnd" + type: "integer" + mode: "nullable" + - name: "rcvdpdtngcd" + type: "string" + mode: "nullable" + - name: "filedf720cd" + type: "string" + mode: "nullable" + - name: "totreprtabled" + type: "integer" + mode: "nullable" + - name: "totcomprelatede" + type: "integer" + mode: "nullable" + - name: "totestcompf" + type: "integer" + mode: "nullable" + - name: "noindiv100kcnt" + type: "integer" + mode: "nullable" + - name: "nocontractor100kcnt" + type: "integer" + mode: "nullable" + - name: "totcntrbgfts" + type: "integer" + mode: "nullable" + - name: "prgmservcode2acd" + type: "integer" + mode: "nullable" + - name: "totrev2acola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2bcd" + type: "integer" + mode: "nullable" + - name: "totrev2bcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ccd" + type: "integer" + mode: "nullable" + - name: "totrev2ccola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2dcd" + type: "integer" + mode: "nullable" + - name: "totrev2dcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ecd" + type: "integer" + mode: "nullable" + - name: "totrev2ecola" + type: "integer" + mode: "nullable" + - name: "totrev2fcola" + type: "integer" + mode: "nullable" + - name: "totprgmrevnue" + type: "integer" + mode: "nullable" + - name: "invstmntinc" + type: "integer" + mode: "nullable" + - name: "txexmptbndsproceeds" + type: "integer" + mode: "nullable" + - name: "royaltsinc" + type: "integer" + mode: "nullable" + - name: "grsrntsreal" + type: "integer" + mode: "nullable" + - name: "grsrntsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlexpnsreal" + type: "integer" + mode: "nullable" + - name: "rntlexpnsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlincreal" + type: "integer" + mode: "nullable" + - name: "rntlincprsnl" + type: "integer" + mode: "nullable" + - name: "netrntlinc" + type: "integer" + mode: "nullable" + - name: "grsalesecur" + type: "integer" + mode: "nullable" + - name: "grsalesothr" + type: "integer" + mode: "nullable" + - name: "cstbasisecur" + type: "integer" + mode: "nullable" + - name: "cstbasisothr" + type: "integer" + mode: "nullable" + - name: "gnlsecur" + type: "integer" + mode: "nullable" + - name: "gnlsothr" + type: "integer" + mode: "nullable" + - name: "netgnls" + type: "integer" + mode: "nullable" + - name: "grsincfndrsng" + type: "integer" + mode: "nullable" + - name: "lessdirfndrsng" + type: "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + mode: "nullable" + - name: "lessdirgaming" + type: "integer" + mode: "nullable" + - name: "netincgaming" + type: "integer" + mode: "nullable" + - name: "grsalesinvent" + type: "integer" + mode: "nullable" + - name: "lesscstofgoods" + type: "integer" + mode: "nullable" + - name: "netincsales" + type: "integer" + mode: "nullable" + - name: "miscrev11acd" + type: "integer" + mode: "nullable" + - name: "miscrevtota" + type: "integer" + mode: "nullable" + - name: "miscrev11bcd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11b" + type: "integer" + mode: "nullable" + - name: "miscrev11ccd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11c" + type: "integer" + mode: "nullable" + - name: "miscrevtot11d" + type: "integer" + mode: "nullable" + - name: "miscrevtot11e" + type: "integer" + mode: "nullable" + - name: "totrevenue" + type: "integer" + mode: "nullable" + - name: "grntstogovt" + type: "integer" + mode: "nullable" + - name: "grnsttoindiv" + type: "integer" + mode: "nullable" + - name: "grntstofrgngovt" + type: "integer" + mode: "nullable" + - name: "benifitsmembrs" + type: "integer" + mode: "nullable" + - name: "compnsatncurrofcr" + type: "integer" + mode: "nullable" + - name: "compnsatnandothr" + type: "integer" + mode: "nullable" + - name: "othrsalwages" + type: "integer" + mode: "nullable" + - name: "pensionplancontrb" + type: "integer" + mode: "nullable" + - name: "othremplyeebenef" + type: "integer" + mode: "nullable" + - name: "payrolltx" + type: "integer" + mode: "nullable" + - name: "feesforsrvcmgmt" + type: "integer" + mode: "nullable" + - name: "legalfees" + type: "integer" + mode: "nullable" + - name: "accntingfees" + type: "integer" + mode: "nullable" + - name: "feesforsrvclobby" + type: "integer" + mode: "nullable" + - name: "profndraising" + type: "integer" + mode: "nullable" + - name: "feesforsrvcinvstmgmt" + type: "integer" + mode: "nullable" + - name: "feesforsrvcothr" + type: "integer" + mode: "nullable" + - name: "advrtpromo" + type: "integer" + mode: "nullable" + - name: "officexpns" + type: "integer" + mode: "nullable" + - name: "infotech" + type: "integer" + mode: "nullable" + - name: "royaltsexpns" + type: "integer" + mode: "nullable" + - name: "occupancy" + type: "integer" + mode: "nullable" + - name: "travel" + type: "integer" + mode: "nullable" + - name: "travelofpublicoffcl" + type: "integer" + mode: "nullable" + - name: "converconventmtng" + type: "integer" + mode: "nullable" + - name: "interestamt" + type: "integer" + mode: "nullable" + - name: "pymtoaffiliates" + type: "integer" + mode: "nullable" + - name: "deprcatndepletn" + type: "integer" + mode: "nullable" + - name: "insurance" + type: "integer" + mode: "nullable" + - name: "othrexpnsa" + type: "integer" + mode: "nullable" + - name: "othrexpnsb" + type: "integer" + mode: "nullable" + - name: "othrexpnsc" + type: "integer" + mode: "nullable" + - name: "othrexpnsd" + type: "integer" + mode: "nullable" + - name: "othrexpnse" + type: "integer" + mode: "nullable" + - name: "othrexpnsf" + type: "integer" + mode: "nullable" + - name: "totfuncexpns" + type: "integer" + mode: "nullable" + - name: "nonintcashend" + type: "integer" + mode: "nullable" + - name: "svngstempinvend" + type: "integer" + mode: "nullable" + - name: "pldgegrntrcvblend" + type: "integer" + mode: "nullable" + - name: "accntsrcvblend" + type: "integer" + mode: "nullable" + - name: "currfrmrcvblend" + type: "integer" + mode: "nullable" + - name: "rcvbldisqualend" + type: "integer" + mode: "nullable" + - name: "notesloansrcvblend" + type: "integer" + mode: "nullable" + - name: "invntriesalesend" + type: "integer" + mode: "nullable" + - name: "prepaidexpnsend" + type: "integer" + mode: "nullable" + - name: "lndbldgsequipend" + type: "integer" + mode: "nullable" + - name: "invstmntsend" + type: "integer" + mode: "nullable" + - name: "invstmntsothrend" + type: "integer" + mode: "nullable" + - name: "invstmntsprgmend" + type: "integer" + mode: "nullable" + - name: "intangibleassetsend" + type: "integer" + mode: "nullable" + - name: "othrassetsend" + type: "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" + mode: "nullable" + - name: "accntspayableend" + type: "integer" + mode: "nullable" + - name: "grntspayableend" + type: "integer" + mode: "nullable" + - name: "deferedrevnuend" + type: "integer" + mode: "nullable" + - name: "txexmptbndsend" + type: "integer" + mode: "nullable" + - name: "escrwaccntliabend" + type: "integer" + mode: "nullable" + - name: "paybletoffcrsend" + type: "integer" + mode: "nullable" + - name: "secrdmrtgsend" + type: "integer" + mode: "nullable" + - name: "unsecurednotesend" + type: "integer" + mode: "nullable" + - name: "othrliabend" + type: "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" + mode: "nullable" + - name: "unrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "temprstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "permrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "capitalstktrstend" + type: "integer" + mode: "nullable" + - name: "paidinsurplusend" + type: "integer" + mode: "nullable" + - name: "retainedearnend" + type: "integer" + mode: "nullable" + - name: "totnetassetend" + type: "integer" + mode: "nullable" + - name: "totnetliabastend" + type: "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" + mode: "nullable" + + graph_paths: + - "irs_990_2017_transform_csv >> load_irs_990_2017_to_bq" diff --git a/datasets/irs_990/irs_990_ez_2014/irs_990_ez_2014_dag.py b/datasets/irs_990/irs_990_ez_2014/irs_990_ez_2014_dag.py new file mode 100644 index 000000000..140fcf2dc --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2014/irs_990_ez_2014_dag.py @@ -0,0 +1,495 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_ez_2014", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_ez_2014_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_ez_2014_transform_csv", + startup_timeout_seconds=600, + name="irs_990_ez_2014", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/14eofinextract990ez.zip", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_ez_2014/data_output.csv", + "PIPELINE_NAME": "irs_990_ez_2014", + "CSV_HEADERS": '["ein","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_ez_2014_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_ez_2014_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_ez_2014/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_ez_2014", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "tax_pd", + "type": "integer", + "description": "Tax period", + "mode": "nullable", + }, + { + "name": "subseccd", + "type": "integer", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "totcntrbs", + "type": "integer", + "description": "Contributions gifts grants etc received", + "mode": "nullable", + }, + { + "name": "prgmservrev", + "type": "integer", + "description": "Program service revenue", + "mode": "nullable", + }, + { + "name": "duesassesmnts", + "type": "integer", + "description": "Membership dues and assessments", + "mode": "nullable", + }, + { + "name": "othrinvstinc", + "type": "integer", + "description": "Investment income", + "mode": "nullable", + }, + { + "name": "grsamtsalesastothr", + "type": "integer", + "description": "Gross amount from sale of assets", + "mode": "nullable", + }, + { + "name": "basisalesexpnsothr", + "type": "integer", + "description": "Cost or other basis and sales expenses", + "mode": "nullable", + }, + { + "name": "gnsaleofastothr", + "type": "integer", + "description": "Gain or (loss) from sale of assets", + "mode": "nullable", + }, + { + "name": "grsincgaming", + "type": "integer", + "description": "Gross income from gaming", + "mode": "nullable", + }, + { + "name": "grsrevnuefndrsng", + "type": "integer", + "description": "Special events gross revenue", + "mode": "nullable", + }, + { + "name": "direxpns", + "type": "integer", + "description": "Special events direct expenses", + "mode": "nullable", + }, + { + "name": "netincfndrsng", + "type": "integer", + "description": "Special events net income (or loss)", + "mode": "nullable", + }, + { + "name": "grsalesminusret", + "type": "integer", + "description": "Gross sales of inventory", + "mode": "nullable", + }, + { + "name": "costgoodsold", + "type": "integer", + "description": "Less: cost of goods sold", + "mode": "nullable", + }, + { + "name": "grsprft", + "type": "integer", + "description": "Gross profit (or loss) from sales of inventory", + "mode": "nullable", + }, + { + "name": "othrevnue", + "type": "integer", + "description": "Other revenue - total", + "mode": "nullable", + }, + { + "name": "totrevnue", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "totexpns", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "totexcessyr", + "type": "integer", + "description": "Excess or deficit", + "mode": "nullable", + }, + { + "name": "othrchgsnetassetfnd", + "type": "integer", + "description": "Other changes in net assets", + "mode": "nullable", + }, + { + "name": "networthend", + "type": "integer", + "description": "Net assets EOY", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets e-o-y", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities e-o-y", + "mode": "nullable", + }, + { + "name": "totnetassetsend", + "type": "integer", + "description": "Total net worth e-o-y", + "mode": "nullable", + }, + { + "name": "actvtynotprevrptcd", + "type": "string", + "description": "Activity not previously reported?", + "mode": "nullable", + }, + { + "name": "chngsinorgcd", + "type": "string", + "description": "Significant changes to governing docs?", + "mode": "nullable", + }, + { + "name": "unrelbusincd", + "type": "string", + "description": "UBI over $1000?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Organization Filed 990T", + "mode": "nullable", + }, + { + "name": "contractioncd", + "type": "string", + "description": "Liquidation dissolution termination or contraction", + "mode": "nullable", + }, + { + "name": "politicalexpend", + "type": "integer", + "description": "Direct or indirect political expenditures", + "mode": "nullable", + }, + { + "name": "filedf1120polcd", + "type": "string", + "description": "File Form 1120-POL?", + "mode": "nullable", + }, + { + "name": "loanstoofficerscd", + "type": "string", + "description": "Loans to/from officers directors or trustees?", + "mode": "nullable", + }, + { + "name": "loanstoofficers", + "type": "integer", + "description": "Amount of loans to/from officers", + "mode": "nullable", + }, + { + "name": "initiationfee", + "type": "integer", + "description": "Initiation fees and capital contributions", + "mode": "nullable", + }, + { + "name": "grspublicrcpts", + "type": "integer", + "description": "Gross receipts for public use of club facilities", + "mode": "nullable", + }, + { + "name": "s4958excessbenefcd", + "type": "string", + "description": "Section 4958 excess benefit transactions?", + "mode": "nullable", + }, + { + "name": "prohibtdtxshltrcd", + "type": "string", + "description": "Party to a prohibited tax shelter transaction?", + "mode": "nullable", + }, + { + "name": "nonpfrea", + "type": "integer", + "description": "Reason for non-PF status", + "mode": "nullable", + }, + { + "name": "totnooforgscnt", + "type": "integer", + "description": "Number of organizations supported", + "mode": "nullable", + }, + { + "name": "totsupport", + "type": "integer", + "description": "Sum of amounts of support", + "mode": "nullable", + }, + { + "name": "gftgrntsrcvd170", + "type": "integer", + "description": "Gifts grants membership fees received (170)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied170", + "type": "integer", + "description": "Tax revenues levied (170)", + "mode": "nullable", + }, + { + "name": "srvcsval170", + "type": "integer", + "description": "Services or facilities furnished by gov (170)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot170", + "type": "integer", + "description": "Public support subtotal (170)", + "mode": "nullable", + }, + { + "name": "exceeds2pct170", + "type": "integer", + "description": "Amount support exceeds total (170)", + "mode": "nullable", + }, + { + "name": "pubsupplesspct170", + "type": "integer", + "description": "Public support (170)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot170", + "type": "integer", + "description": "Public support from line 4 (170)", + "mode": "nullable", + }, + { + "name": "grsinc170", + "type": "integer", + "description": "Gross income from interest etc (170)", + "mode": "nullable", + }, + { + "name": "netincunreltd170", + "type": "integer", + "description": "Net UBI (170)", + "mode": "nullable", + }, + { + "name": "othrinc170", + "type": "integer", + "description": "Other income (170)", + "mode": "nullable", + }, + { + "name": "totsupp170", + "type": "integer", + "description": "Total support (170)", + "mode": "nullable", + }, + { + "name": "grsrcptsrelated170", + "type": "integer", + "description": "Gross receipts from related activities (170)", + "mode": "nullable", + }, + { + "name": "totgftgrntrcvd509", + "type": "integer", + "description": "Gifts grants membership fees received (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsadmissn509", + "type": "integer", + "description": "Receipts from admissions merchandise etc (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsactivities509", + "type": "integer", + "description": "Gross receipts from related activities (509)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied509", + "type": "integer", + "description": "Tax revenues levied (509)", + "mode": "nullable", + }, + { + "name": "srvcsval509", + "type": "integer", + "description": "Services or facilities furnished by gov (509)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "rcvdfrmdisqualsub509", + "type": "integer", + "description": "Amounts from disqualified persons (509)", + "mode": "nullable", + }, + { + "name": "exceeds1pct509", + "type": "integer", + "description": "Amount support exceeds total (509)", + "mode": "nullable", + }, + { + "name": "subtotpub509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "pubsupplesub509", + "type": "integer", + "description": "Public support (509)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot509", + "type": "integer", + "description": "Public support from line 6 (509)", + "mode": "nullable", + }, + { + "name": "grsinc509", + "type": "integer", + "description": "Gross income from interest etc (509)", + "mode": "nullable", + }, + { + "name": "unreltxincls511tx509", + "type": "integer", + "description": "Net UBI (509)", + "mode": "nullable", + }, + { + "name": "subtotsuppinc509", + "type": "integer", + "description": "Subtotal total support (509)", + "mode": "nullable", + }, + { + "name": "netincunrelatd509", + "type": "integer", + "description": "Net income from UBI not in 10b (509)", + "mode": "nullable", + }, + { + "name": "othrinc509", + "type": "integer", + "description": "Other income (509)", + "mode": "nullable", + }, + { + "name": "totsupp509", + "type": "integer", + "description": "Total support (509)", + "mode": "nullable", + }, + ], + ) + + irs_990_ez_2014_transform_csv >> load_irs_990_ez_2014_to_bq diff --git a/datasets/irs_990/irs_990_ez_2014/pipeline.yaml b/datasets/irs_990/irs_990_ez_2014/pipeline.yaml new file mode 100644 index 000000000..f12770df9 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2014/pipeline.yaml @@ -0,0 +1,397 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_ez_2014 + + # Description of the table + description: "IRS 990 EZ 2014 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_ez_2014 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_ez_2014_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_ez_2014" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/14eofinextract990ez.zip" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_ez_2014/data_output.csv" + PIPELINE_NAME: "irs_990_ez_2014" + CSV_HEADERS: >- + ["ein","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + + task_id: "load_irs_990_ez_2014_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_ez_2014/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_ez_2014" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "tax_pd" + type: "integer" + description: "Tax period" + mode: "nullable" + - name: "subseccd" + type: "integer" + description: "Subsection code" + mode: "nullable" + - name: "totcntrbs" + type: "integer" + description: "Contributions gifts grants etc received" + mode: "nullable" + - name: "prgmservrev" + type: "integer" + description: "Program service revenue" + mode: "nullable" + - name: "duesassesmnts" + type: "integer" + description: "Membership dues and assessments" + mode: "nullable" + - name: "othrinvstinc" + type: "integer" + description: "Investment income" + mode: "nullable" + - name: "grsamtsalesastothr" + type: "integer" + description: "Gross amount from sale of assets" + mode: "nullable" + - name: "basisalesexpnsothr" + type: "integer" + description: "Cost or other basis and sales expenses" + mode: "nullable" + - name: "gnsaleofastothr" + type: "integer" + description: "Gain or (loss) from sale of assets" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + description: "Gross income from gaming" + mode: "nullable" + - name: "grsrevnuefndrsng" + type: "integer" + description: "Special events gross revenue" + mode: "nullable" + - name: "direxpns" + type: "integer" + description: "Special events direct expenses" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + description: "Special events net income (or loss)" + mode: "nullable" + - name: "grsalesminusret" + type: "integer" + description: "Gross sales of inventory" + mode: "nullable" + - name: "costgoodsold" + type: "integer" + description: "Less: cost of goods sold" + mode: "nullable" + - name: "grsprft" + type: "integer" + description: "Gross profit (or loss) from sales of inventory" + mode: "nullable" + - name: "othrevnue" + type: "integer" + description: "Other revenue - total" + mode: "nullable" + - name: "totrevnue" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "totexpns" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "totexcessyr" + type: "integer" + description: "Excess or deficit" + mode: "nullable" + - name: "othrchgsnetassetfnd" + type: "integer" + description: "Other changes in net assets" + mode: "nullable" + - name: "networthend" + type: "integer" + description: "Net assets EOY" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets e-o-y" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities e-o-y" + mode: "nullable" + - name: "totnetassetsend" + type: "integer" + description: "Total net worth e-o-y" + mode: "nullable" + - name: "actvtynotprevrptcd" + type: "string" + description: "Activity not previously reported?" + mode: "nullable" + - name: "chngsinorgcd" + type: "string" + description: "Significant changes to governing docs?" + mode: "nullable" + - name: "unrelbusincd" + type: "string" + description: "UBI over $1000?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Organization Filed 990T" + mode: "nullable" + - name: "contractioncd" + type: "string" + description: "Liquidation dissolution termination or contraction" + mode: "nullable" + - name: "politicalexpend" + type: "integer" + description: "Direct or indirect political expenditures" + mode: "nullable" + - name: "filedf1120polcd" + type: "string" + description: "File Form 1120-POL?" + mode: "nullable" + - name: "loanstoofficerscd" + type: "string" + description: "Loans to/from officers directors or trustees?" + mode: "nullable" + - name: "loanstoofficers" + type: "integer" + description: "Amount of loans to/from officers" + mode: "nullable" + - name: "initiationfee" + type: "integer" + description: "Initiation fees and capital contributions" + mode: "nullable" + - name: "grspublicrcpts" + type: "integer" + description: "Gross receipts for public use of club facilities" + mode: "nullable" + - name: "s4958excessbenefcd" + type: "string" + description: "Section 4958 excess benefit transactions?" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + description: "Party to a prohibited tax shelter transaction?" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + description: "Reason for non-PF status" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + description: "Number of organizations supported" + mode: "nullable" + - name: "totsupport" + type: "integer" + description: "Sum of amounts of support" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + description: "Gifts grants membership fees received (170)" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + description: "Tax revenues levied (170)" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + description: "Services or facilities furnished by gov (170)" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + description: "Public support subtotal (170)" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + description: "Amount support exceeds total (170)" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + description: "Public support (170)" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + description: "Public support from line 4 (170)" + mode: "nullable" + - name: "grsinc170" + type: "integer" + description: "Gross income from interest etc (170)" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + description: "Net UBI (170)" + mode: "nullable" + - name: "othrinc170" + type: "integer" + description: "Other income (170)" + mode: "nullable" + - name: "totsupp170" + type: "integer" + description: "Total support (170)" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + description: "Gross receipts from related activities (170)" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + description: "Gifts grants membership fees received (509)" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + description: "Receipts from admissions merchandise etc (509)" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + description: "Gross receipts from related activities (509)" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + description: "Tax revenues levied (509)" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + description: "Services or facilities furnished by gov (509)" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + description: "Public support subtotal (509)" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + description: "Amounts from disqualified persons (509)" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + description: "Amount support exceeds total (509)" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + description: "Public support subtotal (509)" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + description: "Public support (509)" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + description: "Public support from line 6 (509)" + mode: "nullable" + - name: "grsinc509" + type: "integer" + description: "Gross income from interest etc (509)" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + description: "Net UBI (509)" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + description: "Subtotal total support (509)" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + description: "Net income from UBI not in 10b (509)" + mode: "nullable" + - name: "othrinc509" + type: "integer" + description: "Other income (509)" + mode: "nullable" + - name: "totsupp509" + type: "integer" + description: "Total support (509)" + mode: "nullable" + + + graph_paths: + - "irs_990_ez_2014_transform_csv >> load_irs_990_ez_2014_to_bq" diff --git a/datasets/irs_990/irs_990_ez_2015/irs_990_ez_2015_dag.py b/datasets/irs_990/irs_990_ez_2015/irs_990_ez_2015_dag.py new file mode 100644 index 000000000..6e9e3cdb1 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2015/irs_990_ez_2015_dag.py @@ -0,0 +1,501 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_ez_2015", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_ez_2015_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_ez_2015_transform_csv", + startup_timeout_seconds=600, + name="irs_990_ez_2015", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/15eofinextractEZ.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_ez_2015/data_output.csv", + "PIPELINE_NAME": "irs_990_ez_2015", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', + }, + resources={"request_memory": "2G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_ez_2015_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_ez_2015_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_ez_2015/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_ez_2015", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_pd", + "type": "integer", + "description": "Tax period", + "mode": "nullable", + }, + { + "name": "subseccd", + "type": "integer", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "totcntrbs", + "type": "integer", + "description": "Contributions gifts grants etc received", + "mode": "nullable", + }, + { + "name": "prgmservrev", + "type": "integer", + "description": "Program service revenue", + "mode": "nullable", + }, + { + "name": "duesassesmnts", + "type": "integer", + "description": "Membership dues and assessments", + "mode": "nullable", + }, + { + "name": "othrinvstinc", + "type": "integer", + "description": "Investment income", + "mode": "nullable", + }, + { + "name": "grsamtsalesastothr", + "type": "integer", + "description": "Gross amount from sale of assets", + "mode": "nullable", + }, + { + "name": "basisalesexpnsothr", + "type": "integer", + "description": "Cost or other basis and sales expenses", + "mode": "nullable", + }, + { + "name": "gnsaleofastothr", + "type": "integer", + "description": "Gain or (loss) from sale of assets", + "mode": "nullable", + }, + { + "name": "grsincgaming", + "type": "integer", + "description": "Gross income from gaming", + "mode": "nullable", + }, + { + "name": "grsrevnuefndrsng", + "type": "integer", + "description": "Special events gross revenue", + "mode": "nullable", + }, + { + "name": "direxpns", + "type": "integer", + "description": "Special events direct expenses", + "mode": "nullable", + }, + { + "name": "netincfndrsng", + "type": "integer", + "description": "Special events net income (or loss)", + "mode": "nullable", + }, + { + "name": "grsalesminusret", + "type": "integer", + "description": "Gross sales of inventory", + "mode": "nullable", + }, + { + "name": "costgoodsold", + "type": "integer", + "description": "Less: cost of goods sold", + "mode": "nullable", + }, + { + "name": "grsprft", + "type": "integer", + "description": "Gross profit (or loss) from sales of inventory", + "mode": "nullable", + }, + { + "name": "othrevnue", + "type": "integer", + "description": "Other revenue - total", + "mode": "nullable", + }, + { + "name": "totrevnue", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "totexpns", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "totexcessyr", + "type": "integer", + "description": "Excess or deficit", + "mode": "nullable", + }, + { + "name": "othrchgsnetassetfnd", + "type": "integer", + "description": "Other changes in net assets", + "mode": "nullable", + }, + { + "name": "networthend", + "type": "integer", + "description": "Net assets EOY", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets e-o-y", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities e-o-y", + "mode": "nullable", + }, + { + "name": "totnetassetsend", + "type": "integer", + "description": "Total net worth e-o-y", + "mode": "nullable", + }, + { + "name": "actvtynotprevrptcd", + "type": "string", + "description": "Activity not previously reported?", + "mode": "nullable", + }, + { + "name": "chngsinorgcd", + "type": "string", + "description": "Significant changes to governing docs?", + "mode": "nullable", + }, + { + "name": "unrelbusincd", + "type": "string", + "description": "UBI over $1000?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Organization Filed 990T", + "mode": "nullable", + }, + { + "name": "contractioncd", + "type": "string", + "description": "Liquidation dissolution termination or contraction", + "mode": "nullable", + }, + { + "name": "politicalexpend", + "type": "integer", + "description": "Direct or indirect political expenditures", + "mode": "nullable", + }, + { + "name": "filedf1120polcd", + "type": "string", + "description": "File Form 1120-POL?", + "mode": "nullable", + }, + { + "name": "loanstoofficerscd", + "type": "string", + "description": "Loans to/from officers directors or trustees?", + "mode": "nullable", + }, + { + "name": "loanstoofficers", + "type": "integer", + "description": "Amount of loans to/from officers", + "mode": "nullable", + }, + { + "name": "initiationfee", + "type": "integer", + "description": "Initiation fees and capital contributions", + "mode": "nullable", + }, + { + "name": "grspublicrcpts", + "type": "integer", + "description": "Gross receipts for public use of club facilities", + "mode": "nullable", + }, + { + "name": "s4958excessbenefcd", + "type": "string", + "description": "Section 4958 excess benefit transactions?", + "mode": "nullable", + }, + { + "name": "prohibtdtxshltrcd", + "type": "string", + "description": "Party to a prohibited tax shelter transaction?", + "mode": "nullable", + }, + { + "name": "nonpfrea", + "type": "integer", + "description": "Reason for non-PF status", + "mode": "nullable", + }, + { + "name": "totnooforgscnt", + "type": "integer", + "description": "Number of organizations supported", + "mode": "nullable", + }, + { + "name": "totsupport", + "type": "integer", + "description": "Sum of amounts of support", + "mode": "nullable", + }, + { + "name": "gftgrntsrcvd170", + "type": "integer", + "description": "Gifts grants membership fees received (170)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied170", + "type": "integer", + "description": "Tax revenues levied (170)", + "mode": "nullable", + }, + { + "name": "srvcsval170", + "type": "integer", + "description": "Services or facilities furnished by gov (170)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot170", + "type": "integer", + "description": "Public support subtotal (170)", + "mode": "nullable", + }, + { + "name": "exceeds2pct170", + "type": "integer", + "description": "Amount support exceeds total (170)", + "mode": "nullable", + }, + { + "name": "pubsupplesspct170", + "type": "integer", + "description": "Public support (170)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot170", + "type": "integer", + "description": "Public support from line 4 (170)", + "mode": "nullable", + }, + { + "name": "grsinc170", + "type": "integer", + "description": "Gross income from interest etc (170)", + "mode": "nullable", + }, + { + "name": "netincunreltd170", + "type": "integer", + "description": "Net UBI (170)", + "mode": "nullable", + }, + { + "name": "othrinc170", + "type": "integer", + "description": "Other income (170)", + "mode": "nullable", + }, + { + "name": "totsupp170", + "type": "integer", + "description": "Total support (170)", + "mode": "nullable", + }, + { + "name": "grsrcptsrelated170", + "type": "integer", + "description": "Gross receipts from related activities (170)", + "mode": "nullable", + }, + { + "name": "totgftgrntrcvd509", + "type": "integer", + "description": "Gifts grants membership fees received (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsadmissn509", + "type": "integer", + "description": "Receipts from admissions merchandise etc (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsactivities509", + "type": "integer", + "description": "Gross receipts from related activities (509)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied509", + "type": "integer", + "description": "Tax revenues levied (509)", + "mode": "nullable", + }, + { + "name": "srvcsval509", + "type": "integer", + "description": "Services or facilities furnished by gov (509)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "rcvdfrmdisqualsub509", + "type": "integer", + "description": "Amounts from disqualified persons (509)", + "mode": "nullable", + }, + { + "name": "exceeds1pct509", + "type": "integer", + "description": "Amount support exceeds total (509)", + "mode": "nullable", + }, + { + "name": "subtotpub509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "pubsupplesub509", + "type": "integer", + "description": "Public support (509)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot509", + "type": "integer", + "description": "Public support from line 6 (509)", + "mode": "nullable", + }, + { + "name": "grsinc509", + "type": "integer", + "description": "Gross income from interest etc (509)", + "mode": "nullable", + }, + { + "name": "unreltxincls511tx509", + "type": "integer", + "description": "Net UBI (509)", + "mode": "nullable", + }, + { + "name": "subtotsuppinc509", + "type": "integer", + "description": "Subtotal total support (509)", + "mode": "nullable", + }, + { + "name": "netincunrelatd509", + "type": "integer", + "description": "Net income from UBI not in 10b (509)", + "mode": "nullable", + }, + { + "name": "othrinc509", + "type": "integer", + "description": "Other income (509)", + "mode": "nullable", + }, + { + "name": "totsupp509", + "type": "integer", + "description": "Total support (509)", + "mode": "nullable", + }, + ], + ) + + irs_990_ez_2015_transform_csv >> load_irs_990_ez_2015_to_bq diff --git a/datasets/irs_990/irs_990_ez_2015/pipeline.yaml b/datasets/irs_990/irs_990_ez_2015/pipeline.yaml new file mode 100644 index 000000000..aeb6fccce --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2015/pipeline.yaml @@ -0,0 +1,401 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_ez_2015 + + # Description of the table + description: "IRS 990 EZ 2015 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_ez_2015 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_ez_2015_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_ez_2015" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/15eofinextractEZ.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_ez_2015/data_output.csv" + PIPELINE_NAME: "irs_990_ez_2015" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "2G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + + task_id: "load_irs_990_ez_2015_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_ez_2015/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_ez_2015" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "elf" + type: "string" + description: "E-file indicator" + mode: "nullable" + - name: "tax_pd" + type: "integer" + description: "Tax period" + mode: "nullable" + - name: "subseccd" + type: "integer" + description: "Subsection code" + mode: "nullable" + - name: "totcntrbs" + type: "integer" + description: "Contributions gifts grants etc received" + mode: "nullable" + - name: "prgmservrev" + type: "integer" + description: "Program service revenue" + mode: "nullable" + - name: "duesassesmnts" + type: "integer" + description: "Membership dues and assessments" + mode: "nullable" + - name: "othrinvstinc" + type: "integer" + description: "Investment income" + mode: "nullable" + - name: "grsamtsalesastothr" + type: "integer" + description: "Gross amount from sale of assets" + mode: "nullable" + - name: "basisalesexpnsothr" + type: "integer" + description: "Cost or other basis and sales expenses" + mode: "nullable" + - name: "gnsaleofastothr" + type: "integer" + description: "Gain or (loss) from sale of assets" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + description: "Gross income from gaming" + mode: "nullable" + - name: "grsrevnuefndrsng" + type: "integer" + description: "Special events gross revenue" + mode: "nullable" + - name: "direxpns" + type: "integer" + description: "Special events direct expenses" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + description: "Special events net income (or loss)" + mode: "nullable" + - name: "grsalesminusret" + type: "integer" + description: "Gross sales of inventory" + mode: "nullable" + - name: "costgoodsold" + type: "integer" + description: "Less: cost of goods sold" + mode: "nullable" + - name: "grsprft" + type: "integer" + description: "Gross profit (or loss) from sales of inventory" + mode: "nullable" + - name: "othrevnue" + type: "integer" + description: "Other revenue - total" + mode: "nullable" + - name: "totrevnue" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "totexpns" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "totexcessyr" + type: "integer" + description: "Excess or deficit" + mode: "nullable" + - name: "othrchgsnetassetfnd" + type: "integer" + description: "Other changes in net assets" + mode: "nullable" + - name: "networthend" + type: "integer" + description: "Net assets EOY" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets e-o-y" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities e-o-y" + mode: "nullable" + - name: "totnetassetsend" + type: "integer" + description: "Total net worth e-o-y" + mode: "nullable" + - name: "actvtynotprevrptcd" + type: "string" + description: "Activity not previously reported?" + mode: "nullable" + - name: "chngsinorgcd" + type: "string" + description: "Significant changes to governing docs?" + mode: "nullable" + - name: "unrelbusincd" + type: "string" + description: "UBI over $1000?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Organization Filed 990T" + mode: "nullable" + - name: "contractioncd" + type: "string" + description: "Liquidation dissolution termination or contraction" + mode: "nullable" + - name: "politicalexpend" + type: "integer" + description: "Direct or indirect political expenditures" + mode: "nullable" + - name: "filedf1120polcd" + type: "string" + description: "File Form 1120-POL?" + mode: "nullable" + - name: "loanstoofficerscd" + type: "string" + description: "Loans to/from officers directors or trustees?" + mode: "nullable" + - name: "loanstoofficers" + type: "integer" + description: "Amount of loans to/from officers" + mode: "nullable" + - name: "initiationfee" + type: "integer" + description: "Initiation fees and capital contributions" + mode: "nullable" + - name: "grspublicrcpts" + type: "integer" + description: "Gross receipts for public use of club facilities" + mode: "nullable" + - name: "s4958excessbenefcd" + type: "string" + description: "Section 4958 excess benefit transactions?" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + description: "Party to a prohibited tax shelter transaction?" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + description: "Reason for non-PF status" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + description: "Number of organizations supported" + mode: "nullable" + - name: "totsupport" + type: "integer" + description: "Sum of amounts of support" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + description: "Gifts grants membership fees received (170)" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + description: "Tax revenues levied (170)" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + description: "Services or facilities furnished by gov (170)" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + description: "Public support subtotal (170)" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + description: "Amount support exceeds total (170)" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + description: "Public support (170)" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + description: "Public support from line 4 (170)" + mode: "nullable" + - name: "grsinc170" + type: "integer" + description: "Gross income from interest etc (170)" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + description: "Net UBI (170)" + mode: "nullable" + - name: "othrinc170" + type: "integer" + description: "Other income (170)" + mode: "nullable" + - name: "totsupp170" + type: "integer" + description: "Total support (170)" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + description: "Gross receipts from related activities (170)" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + description: "Gifts grants membership fees received (509)" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + description: "Receipts from admissions merchandise etc (509)" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + description: "Gross receipts from related activities (509)" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + description: "Tax revenues levied (509)" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + description: "Services or facilities furnished by gov (509)" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + description: "Public support subtotal (509)" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + description: "Amounts from disqualified persons (509)" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + description: "Amount support exceeds total (509)" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + description: "Public support subtotal (509)" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + description: "Public support (509)" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + description: "Public support from line 6 (509)" + mode: "nullable" + - name: "grsinc509" + type: "integer" + description: "Gross income from interest etc (509)" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + description: "Net UBI (509)" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + description: "Subtotal total support (509)" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + description: "Net income from UBI not in 10b (509)" + mode: "nullable" + - name: "othrinc509" + type: "integer" + description: "Other income (509)" + mode: "nullable" + - name: "totsupp509" + type: "integer" + description: "Total support (509)" + mode: "nullable" + + + graph_paths: + - "irs_990_ez_2015_transform_csv >> load_irs_990_ez_2015_to_bq" diff --git a/datasets/irs_990/irs_990_ez_2016/irs_990_ez_2016_dag.py b/datasets/irs_990/irs_990_ez_2016/irs_990_ez_2016_dag.py new file mode 100644 index 000000000..f741c65a1 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2016/irs_990_ez_2016_dag.py @@ -0,0 +1,501 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_ez_2016", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_ez_2016_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_ez_2016_transform_csv", + startup_timeout_seconds=600, + name="irs_990_ez_2016", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/16eofinextractez.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_ez_2016/data_output.csv", + "PIPELINE_NAME": "irs_990_ez_2016", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_ez_2016_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_ez_2016_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_ez_2016/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_ez_2016", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_pd", + "type": "integer", + "description": "Tax period", + "mode": "nullable", + }, + { + "name": "subseccd", + "type": "integer", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "totcntrbs", + "type": "integer", + "description": "Contributions gifts grants etc received", + "mode": "nullable", + }, + { + "name": "prgmservrev", + "type": "integer", + "description": "Program service revenue", + "mode": "nullable", + }, + { + "name": "duesassesmnts", + "type": "integer", + "description": "Membership dues and assessments", + "mode": "nullable", + }, + { + "name": "othrinvstinc", + "type": "integer", + "description": "Investment income", + "mode": "nullable", + }, + { + "name": "grsamtsalesastothr", + "type": "integer", + "description": "Gross amount from sale of assets", + "mode": "nullable", + }, + { + "name": "basisalesexpnsothr", + "type": "integer", + "description": "Cost or other basis and sales expenses", + "mode": "nullable", + }, + { + "name": "gnsaleofastothr", + "type": "integer", + "description": "Gain or (loss) from sale of assets", + "mode": "nullable", + }, + { + "name": "grsincgaming", + "type": "integer", + "description": "Gross income from gaming", + "mode": "nullable", + }, + { + "name": "grsrevnuefndrsng", + "type": "integer", + "description": "Special events gross revenue", + "mode": "nullable", + }, + { + "name": "direxpns", + "type": "integer", + "description": "Special events direct expenses", + "mode": "nullable", + }, + { + "name": "netincfndrsng", + "type": "integer", + "description": "Special events net income (or loss)", + "mode": "nullable", + }, + { + "name": "grsalesminusret", + "type": "integer", + "description": "Gross sales of inventory", + "mode": "nullable", + }, + { + "name": "costgoodsold", + "type": "integer", + "description": "Less: cost of goods sold", + "mode": "nullable", + }, + { + "name": "grsprft", + "type": "integer", + "description": "Gross profit (or loss) from sales of inventory", + "mode": "nullable", + }, + { + "name": "othrevnue", + "type": "integer", + "description": "Other revenue - total", + "mode": "nullable", + }, + { + "name": "totrevnue", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "totexpns", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "totexcessyr", + "type": "integer", + "description": "Excess or deficit", + "mode": "nullable", + }, + { + "name": "othrchgsnetassetfnd", + "type": "integer", + "description": "Other changes in net assets", + "mode": "nullable", + }, + { + "name": "networthend", + "type": "integer", + "description": "Net assets EOY", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets e-o-y", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities e-o-y", + "mode": "nullable", + }, + { + "name": "totnetassetsend", + "type": "integer", + "description": "Total net worth e-o-y", + "mode": "nullable", + }, + { + "name": "actvtynotprevrptcd", + "type": "string", + "description": "Activity not previously reported?", + "mode": "nullable", + }, + { + "name": "chngsinorgcd", + "type": "string", + "description": "Significant changes to governing docs?", + "mode": "nullable", + }, + { + "name": "unrelbusincd", + "type": "string", + "description": "UBI over $1000?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Organization Filed 990T", + "mode": "nullable", + }, + { + "name": "contractioncd", + "type": "string", + "description": "Liquidation dissolution termination or contraction", + "mode": "nullable", + }, + { + "name": "politicalexpend", + "type": "integer", + "description": "Direct or indirect political expenditures", + "mode": "nullable", + }, + { + "name": "filedf1120polcd", + "type": "string", + "description": "File Form 1120-POL?", + "mode": "nullable", + }, + { + "name": "loanstoofficerscd", + "type": "string", + "description": "Loans to/from officers directors or trustees?", + "mode": "nullable", + }, + { + "name": "loanstoofficers", + "type": "integer", + "description": "Amount of loans to/from officers", + "mode": "nullable", + }, + { + "name": "initiationfee", + "type": "integer", + "description": "Initiation fees and capital contributions", + "mode": "nullable", + }, + { + "name": "grspublicrcpts", + "type": "integer", + "description": "Gross receipts for public use of club facilities", + "mode": "nullable", + }, + { + "name": "s4958excessbenefcd", + "type": "string", + "description": "Section 4958 excess benefit transactions?", + "mode": "nullable", + }, + { + "name": "prohibtdtxshltrcd", + "type": "string", + "description": "Party to a prohibited tax shelter transaction?", + "mode": "nullable", + }, + { + "name": "nonpfrea", + "type": "integer", + "description": "Reason for non-PF status", + "mode": "nullable", + }, + { + "name": "totnooforgscnt", + "type": "integer", + "description": "Number of organizations supported", + "mode": "nullable", + }, + { + "name": "totsupport", + "type": "integer", + "description": "Sum of amounts of support", + "mode": "nullable", + }, + { + "name": "gftgrntsrcvd170", + "type": "integer", + "description": "Gifts grants membership fees received (170)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied170", + "type": "integer", + "description": "Tax revenues levied (170)", + "mode": "nullable", + }, + { + "name": "srvcsval170", + "type": "integer", + "description": "Services or facilities furnished by gov (170)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot170", + "type": "integer", + "description": "Public support subtotal (170)", + "mode": "nullable", + }, + { + "name": "exceeds2pct170", + "type": "integer", + "description": "Amount support exceeds total (170)", + "mode": "nullable", + }, + { + "name": "pubsupplesspct170", + "type": "integer", + "description": "Public support (170)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot170", + "type": "integer", + "description": "Public support from line 4 (170)", + "mode": "nullable", + }, + { + "name": "grsinc170", + "type": "integer", + "description": "Gross income from interest etc (170)", + "mode": "nullable", + }, + { + "name": "netincunreltd170", + "type": "integer", + "description": "Net UBI (170)", + "mode": "nullable", + }, + { + "name": "othrinc170", + "type": "integer", + "description": "Other income (170)", + "mode": "nullable", + }, + { + "name": "totsupp170", + "type": "integer", + "description": "Total support (170)", + "mode": "nullable", + }, + { + "name": "grsrcptsrelated170", + "type": "integer", + "description": "Gross receipts from related activities (170)", + "mode": "nullable", + }, + { + "name": "totgftgrntrcvd509", + "type": "integer", + "description": "Gifts grants membership fees received (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsadmissn509", + "type": "integer", + "description": "Receipts from admissions merchandise etc (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsactivities509", + "type": "integer", + "description": "Gross receipts from related activities (509)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied509", + "type": "integer", + "description": "Tax revenues levied (509)", + "mode": "nullable", + }, + { + "name": "srvcsval509", + "type": "integer", + "description": "Services or facilities furnished by gov (509)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "rcvdfrmdisqualsub509", + "type": "integer", + "description": "Amounts from disqualified persons (509)", + "mode": "nullable", + }, + { + "name": "exceeds1pct509", + "type": "integer", + "description": "Amount support exceeds total (509)", + "mode": "nullable", + }, + { + "name": "subtotpub509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "pubsupplesub509", + "type": "integer", + "description": "Public support (509)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot509", + "type": "integer", + "description": "Public support from line 6 (509)", + "mode": "nullable", + }, + { + "name": "grsinc509", + "type": "integer", + "description": "Gross income from interest etc (509)", + "mode": "nullable", + }, + { + "name": "unreltxincls511tx509", + "type": "integer", + "description": "Net UBI (509)", + "mode": "nullable", + }, + { + "name": "subtotsuppinc509", + "type": "integer", + "description": "Subtotal total support (509)", + "mode": "nullable", + }, + { + "name": "netincunrelatd509", + "type": "integer", + "description": "Net income from UBI not in 10b (509)", + "mode": "nullable", + }, + { + "name": "othrinc509", + "type": "integer", + "description": "Other income (509)", + "mode": "nullable", + }, + { + "name": "totsupp509", + "type": "integer", + "description": "Total support (509)", + "mode": "nullable", + }, + ], + ) + + irs_990_ez_2016_transform_csv >> load_irs_990_ez_2016_to_bq diff --git a/datasets/irs_990/irs_990_ez_2016/pipeline.yaml b/datasets/irs_990/irs_990_ez_2016/pipeline.yaml new file mode 100644 index 000000000..3005403da --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2016/pipeline.yaml @@ -0,0 +1,401 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_ez_2016 + + # Description of the table + description: "IRS 990 EZ 2016 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_ez_2016 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_ez_2016_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_ez_2016" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/16eofinextractez.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_ez_2016/data_output.csv" + PIPELINE_NAME: "irs_990_ez_2016" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + + task_id: "load_irs_990_ez_2016_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_ez_2016/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_ez_2016" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "elf" + type: "string" + description: "E-file indicator" + mode: "nullable" + - name: "tax_pd" + type: "integer" + description: "Tax period" + mode: "nullable" + - name: "subseccd" + type: "integer" + description: "Subsection code" + mode: "nullable" + - name: "totcntrbs" + type: "integer" + description: "Contributions gifts grants etc received" + mode: "nullable" + - name: "prgmservrev" + type: "integer" + description: "Program service revenue" + mode: "nullable" + - name: "duesassesmnts" + type: "integer" + description: "Membership dues and assessments" + mode: "nullable" + - name: "othrinvstinc" + type: "integer" + description: "Investment income" + mode: "nullable" + - name: "grsamtsalesastothr" + type: "integer" + description: "Gross amount from sale of assets" + mode: "nullable" + - name: "basisalesexpnsothr" + type: "integer" + description: "Cost or other basis and sales expenses" + mode: "nullable" + - name: "gnsaleofastothr" + type: "integer" + description: "Gain or (loss) from sale of assets" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + description: "Gross income from gaming" + mode: "nullable" + - name: "grsrevnuefndrsng" + type: "integer" + description: "Special events gross revenue" + mode: "nullable" + - name: "direxpns" + type: "integer" + description: "Special events direct expenses" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + description: "Special events net income (or loss)" + mode: "nullable" + - name: "grsalesminusret" + type: "integer" + description: "Gross sales of inventory" + mode: "nullable" + - name: "costgoodsold" + type: "integer" + description: "Less: cost of goods sold" + mode: "nullable" + - name: "grsprft" + type: "integer" + description: "Gross profit (or loss) from sales of inventory" + mode: "nullable" + - name: "othrevnue" + type: "integer" + description: "Other revenue - total" + mode: "nullable" + - name: "totrevnue" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "totexpns" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "totexcessyr" + type: "integer" + description: "Excess or deficit" + mode: "nullable" + - name: "othrchgsnetassetfnd" + type: "integer" + description: "Other changes in net assets" + mode: "nullable" + - name: "networthend" + type: "integer" + description: "Net assets EOY" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets e-o-y" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities e-o-y" + mode: "nullable" + - name: "totnetassetsend" + type: "integer" + description: "Total net worth e-o-y" + mode: "nullable" + - name: "actvtynotprevrptcd" + type: "string" + description: "Activity not previously reported?" + mode: "nullable" + - name: "chngsinorgcd" + type: "string" + description: "Significant changes to governing docs?" + mode: "nullable" + - name: "unrelbusincd" + type: "string" + description: "UBI over $1000?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Organization Filed 990T" + mode: "nullable" + - name: "contractioncd" + type: "string" + description: "Liquidation dissolution termination or contraction" + mode: "nullable" + - name: "politicalexpend" + type: "integer" + description: "Direct or indirect political expenditures" + mode: "nullable" + - name: "filedf1120polcd" + type: "string" + description: "File Form 1120-POL?" + mode: "nullable" + - name: "loanstoofficerscd" + type: "string" + description: "Loans to/from officers directors or trustees?" + mode: "nullable" + - name: "loanstoofficers" + type: "integer" + description: "Amount of loans to/from officers" + mode: "nullable" + - name: "initiationfee" + type: "integer" + description: "Initiation fees and capital contributions" + mode: "nullable" + - name: "grspublicrcpts" + type: "integer" + description: "Gross receipts for public use of club facilities" + mode: "nullable" + - name: "s4958excessbenefcd" + type: "string" + description: "Section 4958 excess benefit transactions?" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + description: "Party to a prohibited tax shelter transaction?" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + description: "Reason for non-PF status" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + description: "Number of organizations supported" + mode: "nullable" + - name: "totsupport" + type: "integer" + description: "Sum of amounts of support" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + description: "Gifts grants membership fees received (170)" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + description: "Tax revenues levied (170)" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + description: "Services or facilities furnished by gov (170)" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + description: "Public support subtotal (170)" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + description: "Amount support exceeds total (170)" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + description: "Public support (170)" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + description: "Public support from line 4 (170)" + mode: "nullable" + - name: "grsinc170" + type: "integer" + description: "Gross income from interest etc (170)" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + description: "Net UBI (170)" + mode: "nullable" + - name: "othrinc170" + type: "integer" + description: "Other income (170)" + mode: "nullable" + - name: "totsupp170" + type: "integer" + description: "Total support (170)" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + description: "Gross receipts from related activities (170)" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + description: "Gifts grants membership fees received (509)" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + description: "Receipts from admissions merchandise etc (509)" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + description: "Gross receipts from related activities (509)" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + description: "Tax revenues levied (509)" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + description: "Services or facilities furnished by gov (509)" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + description: "Public support subtotal (509)" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + description: "Amounts from disqualified persons (509)" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + description: "Amount support exceeds total (509)" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + description: "Public support subtotal (509)" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + description: "Public support (509)" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + description: "Public support from line 6 (509)" + mode: "nullable" + - name: "grsinc509" + type: "integer" + description: "Gross income from interest etc (509)" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + description: "Net UBI (509)" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + description: "Subtotal total support (509)" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + description: "Net income from UBI not in 10b (509)" + mode: "nullable" + - name: "othrinc509" + type: "integer" + description: "Other income (509)" + mode: "nullable" + - name: "totsupp509" + type: "integer" + description: "Total support (509)" + mode: "nullable" + + + graph_paths: + - "irs_990_ez_2016_transform_csv >> load_irs_990_ez_2016_to_bq" diff --git a/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py b/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py new file mode 100644 index 000000000..7c7037845 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py @@ -0,0 +1,501 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_ez_2017", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_ez_2017_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_ez_2017_transform_csv", + startup_timeout_seconds=600, + name="irs_990_ez_2017", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/17eofinextractEZ.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_ez_2017/data_output.csv", + "PIPELINE_NAME": "irs_990_ez_2017", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_ez_2017_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_ez_2017_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_ez_2017/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_ez_2017", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_pd", + "type": "integer", + "description": "Tax period", + "mode": "nullable", + }, + { + "name": "subseccd", + "type": "integer", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "totcntrbs", + "type": "integer", + "description": "Contributions gifts grants etc received", + "mode": "nullable", + }, + { + "name": "prgmservrev", + "type": "integer", + "description": "Program service revenue", + "mode": "nullable", + }, + { + "name": "duesassesmnts", + "type": "integer", + "description": "Membership dues and assessments", + "mode": "nullable", + }, + { + "name": "othrinvstinc", + "type": "integer", + "description": "Investment income", + "mode": "nullable", + }, + { + "name": "grsamtsalesastothr", + "type": "integer", + "description": "Gross amount from sale of assets", + "mode": "nullable", + }, + { + "name": "basisalesexpnsothr", + "type": "integer", + "description": "Cost or other basis and sales expenses", + "mode": "nullable", + }, + { + "name": "gnsaleofastothr", + "type": "integer", + "description": "Gain or (loss) from sale of assets", + "mode": "nullable", + }, + { + "name": "grsincgaming", + "type": "integer", + "description": "Gross income from gaming", + "mode": "nullable", + }, + { + "name": "grsrevnuefndrsng", + "type": "integer", + "description": "Special events gross revenue", + "mode": "nullable", + }, + { + "name": "direxpns", + "type": "integer", + "description": "Special events direct expenses", + "mode": "nullable", + }, + { + "name": "netincfndrsng", + "type": "integer", + "description": "Special events net income (or loss)", + "mode": "nullable", + }, + { + "name": "grsalesminusret", + "type": "integer", + "description": "Gross sales of inventory", + "mode": "nullable", + }, + { + "name": "costgoodsold", + "type": "integer", + "description": "Less: cost of goods sold", + "mode": "nullable", + }, + { + "name": "grsprft", + "type": "integer", + "description": "Gross profit (or loss) from sales of inventory", + "mode": "nullable", + }, + { + "name": "othrevnue", + "type": "integer", + "description": "Other revenue - total", + "mode": "nullable", + }, + { + "name": "totrevnue", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "totexpns", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "totexcessyr", + "type": "integer", + "description": "Excess or deficit", + "mode": "nullable", + }, + { + "name": "othrchgsnetassetfnd", + "type": "integer", + "description": "Other changes in net assets", + "mode": "nullable", + }, + { + "name": "networthend", + "type": "integer", + "description": "Net assets EOY", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets e-o-y", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities e-o-y", + "mode": "nullable", + }, + { + "name": "totnetassetsend", + "type": "integer", + "description": "Total net worth e-o-y", + "mode": "nullable", + }, + { + "name": "actvtynotprevrptcd", + "type": "string", + "description": "Activity not previously reported?", + "mode": "nullable", + }, + { + "name": "chngsinorgcd", + "type": "string", + "description": "Significant changes to governing docs?", + "mode": "nullable", + }, + { + "name": "unrelbusincd", + "type": "string", + "description": "UBI over $1000?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Organization Filed 990T", + "mode": "nullable", + }, + { + "name": "contractioncd", + "type": "string", + "description": "Liquidation dissolution termination or contraction", + "mode": "nullable", + }, + { + "name": "politicalexpend", + "type": "integer", + "description": "Direct or indirect political expenditures", + "mode": "nullable", + }, + { + "name": "filedf1120polcd", + "type": "string", + "description": "File Form 1120-POL?", + "mode": "nullable", + }, + { + "name": "loanstoofficerscd", + "type": "string", + "description": "Loans to/from officers directors or trustees?", + "mode": "nullable", + }, + { + "name": "loanstoofficers", + "type": "integer", + "description": "Amount of loans to/from officers", + "mode": "nullable", + }, + { + "name": "initiationfee", + "type": "integer", + "description": "Initiation fees and capital contributions", + "mode": "nullable", + }, + { + "name": "grspublicrcpts", + "type": "integer", + "description": "Gross receipts for public use of club facilities", + "mode": "nullable", + }, + { + "name": "s4958excessbenefcd", + "type": "string", + "description": "Section 4958 excess benefit transactions?", + "mode": "nullable", + }, + { + "name": "prohibtdtxshltrcd", + "type": "string", + "description": "Party to a prohibited tax shelter transaction?", + "mode": "nullable", + }, + { + "name": "nonpfrea", + "type": "integer", + "description": "Reason for non-PF status", + "mode": "nullable", + }, + { + "name": "totnooforgscnt", + "type": "integer", + "description": "Number of organizations supported", + "mode": "nullable", + }, + { + "name": "totsupport", + "type": "integer", + "description": "Sum of amounts of support", + "mode": "nullable", + }, + { + "name": "gftgrntsrcvd170", + "type": "integer", + "description": "Gifts grants membership fees received (170)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied170", + "type": "integer", + "description": "Tax revenues levied (170)", + "mode": "nullable", + }, + { + "name": "srvcsval170", + "type": "integer", + "description": "Services or facilities furnished by gov (170)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot170", + "type": "integer", + "description": "Public support subtotal (170)", + "mode": "nullable", + }, + { + "name": "exceeds2pct170", + "type": "integer", + "description": "Amount support exceeds total (170)", + "mode": "nullable", + }, + { + "name": "pubsupplesspct170", + "type": "integer", + "description": "Public support (170)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot170", + "type": "integer", + "description": "Public support from line 4 (170)", + "mode": "nullable", + }, + { + "name": "grsinc170", + "type": "integer", + "description": "Gross income from interest etc (170)", + "mode": "nullable", + }, + { + "name": "netincunreltd170", + "type": "integer", + "description": "Net UBI (170)", + "mode": "nullable", + }, + { + "name": "othrinc170", + "type": "integer", + "description": "Other income (170)", + "mode": "nullable", + }, + { + "name": "totsupp170", + "type": "integer", + "description": "Total support (170)", + "mode": "nullable", + }, + { + "name": "grsrcptsrelated170", + "type": "integer", + "description": "Gross receipts from related activities (170)", + "mode": "nullable", + }, + { + "name": "totgftgrntrcvd509", + "type": "integer", + "description": "Gifts grants membership fees received (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsadmissn509", + "type": "integer", + "description": "Receipts from admissions merchandise etc (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsactivities509", + "type": "integer", + "description": "Gross receipts from related activities (509)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied509", + "type": "integer", + "description": "Tax revenues levied (509)", + "mode": "nullable", + }, + { + "name": "srvcsval509", + "type": "integer", + "description": "Services or facilities furnished by gov (509)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "rcvdfrmdisqualsub509", + "type": "integer", + "description": "Amounts from disqualified persons (509)", + "mode": "nullable", + }, + { + "name": "exceeds1pct509", + "type": "integer", + "description": "Amount support exceeds total (509)", + "mode": "nullable", + }, + { + "name": "subtotpub509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "pubsupplesub509", + "type": "integer", + "description": "Public support (509)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot509", + "type": "integer", + "description": "Public support from line 6 (509)", + "mode": "nullable", + }, + { + "name": "grsinc509", + "type": "integer", + "description": "Gross income from interest etc (509)", + "mode": "nullable", + }, + { + "name": "unreltxincls511tx509", + "type": "integer", + "description": "Net UBI (509)", + "mode": "nullable", + }, + { + "name": "subtotsuppinc509", + "type": "integer", + "description": "Subtotal total support (509)", + "mode": "nullable", + }, + { + "name": "netincunrelatd509", + "type": "integer", + "description": "Net income from UBI not in 10b (509)", + "mode": "nullable", + }, + { + "name": "othrinc509", + "type": "integer", + "description": "Other income (509)", + "mode": "nullable", + }, + { + "name": "totsupp509", + "type": "integer", + "description": "Total support (509)", + "mode": "nullable", + }, + ], + ) + + irs_990_ez_2017_transform_csv >> load_irs_990_ez_2017_to_bq diff --git a/datasets/irs_990/irs_990_ez_2017/pipeline.yaml b/datasets/irs_990/irs_990_ez_2017/pipeline.yaml new file mode 100644 index 000000000..7cb823482 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2017/pipeline.yaml @@ -0,0 +1,401 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_ez_2017 + + # Description of the table + description: "IRS 990 EZ 2017 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_ez_2017 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_ez_2017_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_ez_2017" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/17eofinextractEZ.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_ez_2017/data_output.csv" + PIPELINE_NAME: "irs_990_ez_2017" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + + task_id: "load_irs_990_ez_2017_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_ez_2017/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_ez_2017" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "elf" + type: "string" + description: "E-file indicator" + mode: "nullable" + - name: "tax_pd" + type: "integer" + description: "Tax period" + mode: "nullable" + - name: "subseccd" + type: "integer" + description: "Subsection code" + mode: "nullable" + - name: "totcntrbs" + type: "integer" + description: "Contributions gifts grants etc received" + mode: "nullable" + - name: "prgmservrev" + type: "integer" + description: "Program service revenue" + mode: "nullable" + - name: "duesassesmnts" + type: "integer" + description: "Membership dues and assessments" + mode: "nullable" + - name: "othrinvstinc" + type: "integer" + description: "Investment income" + mode: "nullable" + - name: "grsamtsalesastothr" + type: "integer" + description: "Gross amount from sale of assets" + mode: "nullable" + - name: "basisalesexpnsothr" + type: "integer" + description: "Cost or other basis and sales expenses" + mode: "nullable" + - name: "gnsaleofastothr" + type: "integer" + description: "Gain or (loss) from sale of assets" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + description: "Gross income from gaming" + mode: "nullable" + - name: "grsrevnuefndrsng" + type: "integer" + description: "Special events gross revenue" + mode: "nullable" + - name: "direxpns" + type: "integer" + description: "Special events direct expenses" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + description: "Special events net income (or loss)" + mode: "nullable" + - name: "grsalesminusret" + type: "integer" + description: "Gross sales of inventory" + mode: "nullable" + - name: "costgoodsold" + type: "integer" + description: "Less: cost of goods sold" + mode: "nullable" + - name: "grsprft" + type: "integer" + description: "Gross profit (or loss) from sales of inventory" + mode: "nullable" + - name: "othrevnue" + type: "integer" + description: "Other revenue - total" + mode: "nullable" + - name: "totrevnue" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "totexpns" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "totexcessyr" + type: "integer" + description: "Excess or deficit" + mode: "nullable" + - name: "othrchgsnetassetfnd" + type: "integer" + description: "Other changes in net assets" + mode: "nullable" + - name: "networthend" + type: "integer" + description: "Net assets EOY" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets e-o-y" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities e-o-y" + mode: "nullable" + - name: "totnetassetsend" + type: "integer" + description: "Total net worth e-o-y" + mode: "nullable" + - name: "actvtynotprevrptcd" + type: "string" + description: "Activity not previously reported?" + mode: "nullable" + - name: "chngsinorgcd" + type: "string" + description: "Significant changes to governing docs?" + mode: "nullable" + - name: "unrelbusincd" + type: "string" + description: "UBI over $1000?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Organization Filed 990T" + mode: "nullable" + - name: "contractioncd" + type: "string" + description: "Liquidation dissolution termination or contraction" + mode: "nullable" + - name: "politicalexpend" + type: "integer" + description: "Direct or indirect political expenditures" + mode: "nullable" + - name: "filedf1120polcd" + type: "string" + description: "File Form 1120-POL?" + mode: "nullable" + - name: "loanstoofficerscd" + type: "string" + description: "Loans to/from officers directors or trustees?" + mode: "nullable" + - name: "loanstoofficers" + type: "integer" + description: "Amount of loans to/from officers" + mode: "nullable" + - name: "initiationfee" + type: "integer" + description: "Initiation fees and capital contributions" + mode: "nullable" + - name: "grspublicrcpts" + type: "integer" + description: "Gross receipts for public use of club facilities" + mode: "nullable" + - name: "s4958excessbenefcd" + type: "string" + description: "Section 4958 excess benefit transactions?" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + description: "Party to a prohibited tax shelter transaction?" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + description: "Reason for non-PF status" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + description: "Number of organizations supported" + mode: "nullable" + - name: "totsupport" + type: "integer" + description: "Sum of amounts of support" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + description: "Gifts grants membership fees received (170)" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + description: "Tax revenues levied (170)" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + description: "Services or facilities furnished by gov (170)" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + description: "Public support subtotal (170)" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + description: "Amount support exceeds total (170)" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + description: "Public support (170)" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + description: "Public support from line 4 (170)" + mode: "nullable" + - name: "grsinc170" + type: "integer" + description: "Gross income from interest etc (170)" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + description: "Net UBI (170)" + mode: "nullable" + - name: "othrinc170" + type: "integer" + description: "Other income (170)" + mode: "nullable" + - name: "totsupp170" + type: "integer" + description: "Total support (170)" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + description: "Gross receipts from related activities (170)" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + description: "Gifts grants membership fees received (509)" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + description: "Receipts from admissions merchandise etc (509)" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + description: "Gross receipts from related activities (509)" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + description: "Tax revenues levied (509)" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + description: "Services or facilities furnished by gov (509)" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + description: "Public support subtotal (509)" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + description: "Amounts from disqualified persons (509)" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + description: "Amount support exceeds total (509)" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + description: "Public support subtotal (509)" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + description: "Public support (509)" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + description: "Public support from line 6 (509)" + mode: "nullable" + - name: "grsinc509" + type: "integer" + description: "Gross income from interest etc (509)" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + description: "Net UBI (509)" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + description: "Subtotal total support (509)" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + description: "Net income from UBI not in 10b (509)" + mode: "nullable" + - name: "othrinc509" + type: "integer" + description: "Other income (509)" + mode: "nullable" + - name: "totsupp509" + type: "integer" + description: "Total support (509)" + mode: "nullable" + + + graph_paths: + - "irs_990_ez_2017_transform_csv >> load_irs_990_ez_2017_to_bq" diff --git a/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py b/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py new file mode 100644 index 000000000..189cd7382 --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py @@ -0,0 +1,1137 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_pf_2014", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_pf_2014_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_pf_2014_transform_csv", + startup_timeout_seconds=600, + name="irs_990_pf_2014", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/14eofinextract990pf.zip", + "SOURCE_FILE": "files/data.zip", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_pf_2014/data_output.csv", + "PIPELINE_NAME": "irs_990_pf_2014", + "CSV_HEADERS": '["ein","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","acqdrindrintcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"]', + "RENAME_MAPPINGS": '{"EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","ACQDRINDRINTCD": "acqdrindrintcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_pf_2014_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_pf_2014_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_pf_2014/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_pf_2014", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "tax_prd", + "type": "string", + "description": "Tax period (YYYYMM format)", + "mode": "nullable", + }, + { + "name": "eostatus", + "type": "string", + "description": "EO Status Code", + "mode": "nullable", + }, + { + "name": "tax_yr", + "type": "integer", + "description": "SOI Year", + "mode": "nullable", + }, + { + "name": "operatingcd", + "type": "string", + "description": "Operating foundation code", + "mode": "nullable", + }, + { + "name": "subcd", + "type": "string", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "fairmrktvalamt", + "type": "integer", + "description": "Total assets – e-o-y fair market valu", + "mode": "nullable", + }, + { + "name": "grscontrgifts", + "type": "integer", + "description": "Contributions received", + "mode": "nullable", + }, + { + "name": "schedbind", + "type": "string", + "description": "Schedule B indicator", + "mode": "nullable", + }, + { + "name": "intrstrvnue", + "type": "integer", + "description": "Interest revenue", + "mode": "nullable", + }, + { + "name": "dividndsamt", + "type": "integer", + "description": "", + "mode": "nullable", + }, + { + "name": "grsrents", + "type": "integer", + "description": "Gross rents", + "mode": "nullable", + }, + { + "name": "grsslspramt", + "type": "integer", + "description": "Gross sales price for assets", + "mode": "nullable", + }, + { + "name": "costsold", + "type": "integer", + "description": "Cost-of-goods-sold", + "mode": "nullable", + }, + { + "name": "grsprofitbus", + "type": "integer", + "description": "Gross profit", + "mode": "nullable", + }, + { + "name": "otherincamt", + "type": "integer", + "description": "Other income", + "mode": "nullable", + }, + { + "name": "totrcptperbks", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "compofficers", + "type": "integer", + "description": "Compensation of officers", + "mode": "nullable", + }, + { + "name": "pensplemplbenf", + "type": "integer", + "description": "Pension plans employee benefits", + "mode": "nullable", + }, + { + "name": "legalfeesamt", + "type": "integer", + "description": "Legal fees", + "mode": "nullable", + }, + { + "name": "accountingfees", + "type": "integer", + "description": "Accounting fees", + "mode": "nullable", + }, + { + "name": "interestamt", + "type": "integer", + "description": "Interest", + "mode": "nullable", + }, + { + "name": "depreciationamt", + "type": "integer", + "description": "Depreciation and depletion", + "mode": "nullable", + }, + { + "name": "occupancyamt", + "type": "integer", + "description": "Occupancy", + "mode": "nullable", + }, + { + "name": "travlconfmtngs", + "type": "integer", + "description": "Travel conferences and meetings", + "mode": "nullable", + }, + { + "name": "printingpubl", + "type": "integer", + "description": "Printing and publications", + "mode": "nullable", + }, + { + "name": "topradmnexpnsa", + "type": "integer", + "description": "Total operating and administrative expenses column a", + "mode": "nullable", + }, + { + "name": "contrpdpbks", + "type": "integer", + "description": "Contributions gifts grants paid", + "mode": "nullable", + }, + { + "name": "totexpnspbks", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "excessrcpts", + "type": "integer", + "description": "Net income less deficit", + "mode": "nullable", + }, + { + "name": "totrcptnetinc", + "type": "integer", + "description": "Total receipts net investment income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsb", + "type": "integer", + "description": "Total operating and administrative expenses column b", + "mode": "nullable", + }, + { + "name": "totexpnsnetinc", + "type": "integer", + "description": "Total expenses net investment income", + "mode": "nullable", + }, + { + "name": "netinvstinc", + "type": "integer", + "description": "Net investment income", + "mode": "nullable", + }, + { + "name": "trcptadjnetinc", + "type": "integer", + "description": "Total receipts adjusted net income", + "mode": "nullable", + }, + { + "name": "totexpnsadjnet", + "type": "integer", + "description": "Total expenses adjusted net income", + "mode": "nullable", + }, + { + "name": "adjnetinc", + "type": "integer", + "description": "Adjusted net income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsd", + "type": "integer", + "description": "Total operating and administrative expenses column d", + "mode": "nullable", + }, + { + "name": "totexpnsexempt", + "type": "integer", + "description": "Total expenses – exempt purpose", + "mode": "nullable", + }, + { + "name": "othrcashamt", + "type": "integer", + "description": "Cash non-interest-bearing – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstgovtoblig", + "type": "integer", + "description": "Investments in U.S. & state government obligations – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpstk", + "type": "integer", + "description": "Investments in corporate stock – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpbnd", + "type": "integer", + "description": "Investments in corporate bonds– e-o-y book value", + "mode": "nullable", + }, + { + "name": "totinvstsec", + "type": "integer", + "description": "Total investments in securities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgloans", + "type": "integer", + "description": "Investments mortgage loans – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrinvstend", + "type": "integer", + "description": "Other investments – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrassetseoy", + "type": "integer", + "description": "Other assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgnotespay", + "type": "integer", + "description": "Mortgage loans payable – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrliabltseoy", + "type": "integer", + "description": "Other liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "tfundnworth", + "type": "integer", + "description": "Total fund net worth – e-o-y book value", + "mode": "nullable", + }, + { + "name": "fairmrktvaleoy", + "type": "integer", + "description": "Total assets – e-o-y fair market value", + "mode": "nullable", + }, + { + "name": "totexcapgnls", + "type": "integer", + "description": "Capital gain net income", + "mode": "nullable", + }, + { + "name": "totexcapgn", + "type": "integer", + "description": "Net gain – sales of assets", + "mode": "nullable", + }, + { + "name": "totexcapls", + "type": "integer", + "description": "Net loss – sales of assets", + "mode": "nullable", + }, + { + "name": "invstexcisetx", + "type": "integer", + "description": "Excise tax on net investment income", + "mode": "nullable", + }, + { + "name": "sec4940notxcd", + "type": "string", + "description": "Section 4940 – no tax", + "mode": "nullable", + }, + { + "name": "sec4940redtxcd", + "type": "string", + "description": "Section 4940 – 1 % tax", + "mode": "nullable", + }, + { + "name": "sect511tx", + "type": "integer", + "description": "Section 511 tax", + "mode": "nullable", + }, + { + "name": "subtitleatx", + "type": "integer", + "description": "Subtitle A tax", + "mode": "nullable", + }, + { + "name": "totaxpyr", + "type": "integer", + "description": "Total excise tax", + "mode": "nullable", + }, + { + "name": "esttaxcr", + "type": "integer", + "description": "Estimated tax credit", + "mode": "nullable", + }, + { + "name": "txwithldsrc", + "type": "integer", + "description": "Tax withheld at source", + "mode": "nullable", + }, + { + "name": "txpaidf2758", + "type": "integer", + "description": "Tax paid with Form 2758 (filing extension)", + "mode": "nullable", + }, + { + "name": "erronbkupwthld", + "type": "integer", + "description": "Erroneous backup withholding credit amount", + "mode": "nullable", + }, + { + "name": "estpnlty", + "type": "integer", + "description": "Estimated tax penalty", + "mode": "nullable", + }, + { + "name": "taxdue", + "type": "integer", + "description": "Tax due", + "mode": "nullable", + }, + { + "name": "overpay", + "type": "integer", + "description": "Overpayment", + "mode": "nullable", + }, + { + "name": "crelamt", + "type": "integer", + "description": "Credit elect amount", + "mode": "nullable", + }, + { + "name": "infleg", + "type": "string", + "description": "Influence legislation?", + "mode": "nullable", + }, + { + "name": "actnotpr", + "type": "string", + "description": "Activities not previously reported?", + "mode": "nullable", + }, + { + "name": "chgnprvrptcd", + "type": "string", + "description": "Changes not previously reported?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Filed 990-T?", + "mode": "nullable", + }, + { + "name": "contractncd", + "type": "string", + "description": "Contraction?", + "mode": "nullable", + }, + { + "name": "furnishcpycd", + "type": "string", + "description": "Furnished copy to Attorney General?", + "mode": "nullable", + }, + { + "name": "claimstatcd", + "type": "string", + "description": "Claiming status?", + "mode": "nullable", + }, + { + "name": "cntrbtrstxyrcd", + "type": "string", + "description": "Substantial contributors?", + "mode": "nullable", + }, + { + "name": "acqdrindrintcd", + "type": "string", + "description": "Distribution to donor advised fund with advisory privileges?", + "mode": "nullable", + }, + { + "name": "orgcmplypubcd", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "filedlf1041ind", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "propexchcd", + "type": "string", + "description": "Property exchange?", + "mode": "nullable", + }, + { + "name": "brwlndmnycd", + "type": "string", + "description": "Borrow lend money?", + "mode": "nullable", + }, + { + "name": "furngoodscd", + "type": "string", + "description": "Furnished goods?", + "mode": "nullable", + }, + { + "name": "paidcmpncd", + "type": "string", + "description": "Paid compensation?", + "mode": "nullable", + }, + { + "name": "transfercd", + "type": "string", + "description": "Transfer?", + "mode": "nullable", + }, + { + "name": "agremkpaycd", + "type": "string", + "description": "Agree to make pay?", + "mode": "nullable", + }, + { + "name": "exceptactsind", + "type": "string", + "description": "Acts fail to qualify under section 53.4941(d)-3?", + "mode": "nullable", + }, + { + "name": "prioractvcd", + "type": "string", + "description": "Engage in acts in prior year?", + "mode": "nullable", + }, + { + "name": "undistrinccd", + "type": "string", + "description": "Undistributed income?", + "mode": "nullable", + }, + { + "name": "applyprovind", + "type": "string", + "description": "Not applying section 4942(a)(2) provisions?", + "mode": "nullable", + }, + { + "name": "dirindirintcd", + "type": "string", + "description": "Direct indirect interest?", + "mode": "nullable", + }, + { + "name": "excesshldcd", + "type": "string", + "description": "Excess business holdings?", + "mode": "nullable", + }, + { + "name": "invstjexmptcd", + "type": "string", + "description": "Jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "prevjexmptcd", + "type": "string", + "description": "Prior year jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "propgndacd", + "type": "string", + "description": "Propaganda?", + "mode": "nullable", + }, + { + "name": "ipubelectcd", + "type": "string", + "description": "Influence public election?", + "mode": "nullable", + }, + { + "name": "grntindivcd", + "type": "string", + "description": "Grant individual?", + "mode": "nullable", + }, + { + "name": "nchrtygrntcd", + "type": "string", + "description": "Non-charity grant?", + "mode": "nullable", + }, + { + "name": "nreligiouscd", + "type": "string", + "description": "Non-religious?", + "mode": "nullable", + }, + { + "name": "excptransind", + "type": "string", + "description": "Transactions fail to qualify under section 53.4945?", + "mode": "nullable", + }, + { + "name": "rfprsnlbnftind", + "type": "string", + "description": "Receive funds to pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "pyprsnlbnftind", + "type": "string", + "description": "Pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "tfairmrktunuse", + "type": "integer", + "description": "Fair market value of assets not used for charitable purposes", + "mode": "nullable", + }, + { + "name": "valncharitassets", + "type": "integer", + "description": "Net value of noncharitable-use assets", + "mode": "nullable", + }, + { + "name": "cmpmininvstret", + "type": "integer", + "description": "Minimum investment return", + "mode": "nullable", + }, + { + "name": "distribamt", + "type": "integer", + "description": "Distributable amount", + "mode": "nullable", + }, + { + "name": "undistribincyr", + "type": "integer", + "description": "Undistributed income", + "mode": "nullable", + }, + { + "name": "adjnetinccola", + "type": "integer", + "description": "Adjusted net income column a", + "mode": "nullable", + }, + { + "name": "adjnetinccolb", + "type": "integer", + "description": "Adjusted net income column b", + "mode": "nullable", + }, + { + "name": "adjnetinccolc", + "type": "integer", + "description": "Adjusted net income column c", + "mode": "nullable", + }, + { + "name": "adjnetinccold", + "type": "integer", + "description": "Adjusted net income column d", + "mode": "nullable", + }, + { + "name": "adjnetinctot", + "type": "integer", + "description": "Adjusted net income total", + "mode": "nullable", + }, + { + "name": "qlfydistriba", + "type": "integer", + "description": "Qualifying distributions column a", + "mode": "nullable", + }, + { + "name": "qlfydistribb", + "type": "integer", + "description": "Qualifying distributions column b", + "mode": "nullable", + }, + { + "name": "qlfydistribc", + "type": "integer", + "description": "Qualifying distributions column c", + "mode": "nullable", + }, + { + "name": "qlfydistribd", + "type": "integer", + "description": "Qualifying distributions column d", + "mode": "nullable", + }, + { + "name": "qlfydistribtot", + "type": "integer", + "description": "Qualifying distributions total", + "mode": "nullable", + }, + { + "name": "valassetscola", + "type": "integer", + "description": "Value assets column a", + "mode": "nullable", + }, + { + "name": "valassetscolb", + "type": "integer", + "description": "Value assets column b", + "mode": "nullable", + }, + { + "name": "valassetscolc", + "type": "integer", + "description": "Value assets column c", + "mode": "nullable", + }, + { + "name": "valassetscold", + "type": "integer", + "description": "Value assets column d", + "mode": "nullable", + }, + { + "name": "valassetstot", + "type": "integer", + "description": "Value assets total", + "mode": "nullable", + }, + { + "name": "qlfyasseta", + "type": "integer", + "description": "Qualifying assets column a", + "mode": "nullable", + }, + { + "name": "qlfyassetb", + "type": "integer", + "description": "Qualifying assets column b", + "mode": "nullable", + }, + { + "name": "qlfyassetc", + "type": "integer", + "description": "Qualifying assets column c", + "mode": "nullable", + }, + { + "name": "qlfyassetd", + "type": "integer", + "description": "Qualifying assets column d", + "mode": "nullable", + }, + { + "name": "qlfyassettot", + "type": "integer", + "description": "Qualifying assets total", + "mode": "nullable", + }, + { + "name": "endwmntscola", + "type": "integer", + "description": "Endowments column a", + "mode": "nullable", + }, + { + "name": "endwmntscolb", + "type": "integer", + "description": "Endowments column b", + "mode": "nullable", + }, + { + "name": "endwmntscolc", + "type": "integer", + "description": "Endowments column c", + "mode": "nullable", + }, + { + "name": "endwmntscold", + "type": "integer", + "description": "Endowments column d", + "mode": "nullable", + }, + { + "name": "endwmntstot", + "type": "integer", + "description": "Endowments total", + "mode": "nullable", + }, + { + "name": "totsuprtcola", + "type": "integer", + "description": "Total support column a", + "mode": "nullable", + }, + { + "name": "totsuprtcolb", + "type": "integer", + "description": "Total support column b", + "mode": "nullable", + }, + { + "name": "totsuprtcolc", + "type": "integer", + "description": "Total support column c", + "mode": "nullable", + }, + { + "name": "totsuprtcold", + "type": "integer", + "description": "Total support column d", + "mode": "nullable", + }, + { + "name": "totsuprttot", + "type": "integer", + "description": "Total support total", + "mode": "nullable", + }, + { + "name": "pubsuprtcola", + "type": "integer", + "description": "Public support column a", + "mode": "nullable", + }, + { + "name": "pubsuprtcolb", + "type": "integer", + "description": "Public support column b", + "mode": "nullable", + }, + { + "name": "pubsuprtcolc", + "type": "integer", + "description": "Public support column c", + "mode": "nullable", + }, + { + "name": "pubsuprtcold", + "type": "integer", + "description": "Public support column d", + "mode": "nullable", + }, + { + "name": "pubsuprttot", + "type": "integer", + "description": "Public support total", + "mode": "nullable", + }, + { + "name": "grsinvstinca", + "type": "integer", + "description": "Gross investment income column a", + "mode": "nullable", + }, + { + "name": "grsinvstincb", + "type": "integer", + "description": "Gross investment income column b", + "mode": "nullable", + }, + { + "name": "grsinvstincc", + "type": "integer", + "description": "Gross investment income column c", + "mode": "nullable", + }, + { + "name": "grsinvstincd", + "type": "integer", + "description": "Gross investment income column d", + "mode": "nullable", + }, + { + "name": "grsinvstinctot", + "type": "integer", + "description": "Gross investment income total", + "mode": "nullable", + }, + { + "name": "grntapprvfut", + "type": "integer", + "description": "Grants approved for future payment", + "mode": "nullable", + }, + { + "name": "progsrvcacold", + "type": "integer", + "description": "Program service revenue line 1a (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcacole", + "type": "integer", + "description": "Program service revenue line 1a (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcbcold", + "type": "integer", + "description": "Program service revenue line 1b (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcbcole", + "type": "integer", + "description": "Program service revenue line 1b (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcccold", + "type": "integer", + "description": "Program service revenue line 1c (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcccole", + "type": "integer", + "description": "Program service revenue line 1c (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcdcold", + "type": "integer", + "description": "Program service revenue line 1d (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcdcole", + "type": "integer", + "description": "Program service revenue line 1d (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcecold", + "type": "integer", + "description": "Program service revenue line 1e (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcecole", + "type": "integer", + "description": "Program service revenue line 1e (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcfcold", + "type": "integer", + "description": "Program service revenue line 1f (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcfcole", + "type": "integer", + "description": "Program service revenue line 1f (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcgcold", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcgcole", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (exempt)", + "mode": "nullable", + }, + { + "name": "membershpduesd", + "type": "integer", + "description": "Membership dues and assessments (excluded)", + "mode": "nullable", + }, + { + "name": "membershpduese", + "type": "integer", + "description": "Membership dues and assessments (exempt)", + "mode": "nullable", + }, + { + "name": "intonsvngsd", + "type": "integer", + "description": "Interest on savings and temporary cash investments (excluded)", + "mode": "nullable", + }, + { + "name": "intonsvngse", + "type": "integer", + "description": "Interest on savings and temporary cash investments (exempt)", + "mode": "nullable", + }, + { + "name": "dvdndsintd", + "type": "integer", + "description": "Dividends and interest from securities (excluded)", + "mode": "nullable", + }, + { + "name": "dvdndsinte", + "type": "integer", + "description": "Dividends and interest from securities (exempt)", + "mode": "nullable", + }, + { + "name": "trnsfrcashcd", + "type": "string", + "description": "Transfer cash to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "trnsothasstscd", + "type": "string", + "description": "Transfer other assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "salesasstscd", + "type": "string", + "description": "Sale of assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "prchsasstscd", + "type": "string", + "description": "Purchase of assets from noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "rentlsfacltscd", + "type": "string", + "description": "Rental of facilities or other assets?", + "mode": "nullable", + }, + { + "name": "reimbrsmntscd", + "type": "string", + "description": "Reimbursements arrangements?", + "mode": "nullable", + }, + { + "name": "loansguarcd", + "type": "string", + "description": "Loans or other guarantees?", + "mode": "nullable", + }, + { + "name": "perfservicescd", + "type": "string", + "description": "Performance of services or membership or fundraising solicitations?", + "mode": "nullable", + }, + { + "name": "sharngasstscd", + "type": "string", + "description": "Sharing of facilities equipment mailing lists other assets or paid employees?", + "mode": "nullable", + }, + ], + ) + + irs_990_pf_2014_transform_csv >> load_irs_990_pf_2014_to_bq diff --git a/datasets/irs_990/irs_990_pf_2014/pipeline.yaml b/datasets/irs_990/irs_990_pf_2014/pipeline.yaml new file mode 100644 index 000000000..a40eb889d --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2014/pipeline.yaml @@ -0,0 +1,823 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_pf_2014 + + # Description of the table + description: "IRS 990 PF 2014 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_pf_2014 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_pf_2014_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_pf_2014" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/14eofinextract990pf.zip" + SOURCE_FILE: "files/data.zip" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_pf_2014/data_output.csv" + PIPELINE_NAME: "irs_990_pf_2014" + CSV_HEADERS: >- + ["ein","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","acqdrindrintcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"] + RENAME_MAPPINGS: >- + {"EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","ACQDRINDRINTCD": "acqdrindrintcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"} + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_pf_2014_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_pf_2014/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_pf_2014" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "tax_prd" + type: "string" + description: "Tax period (YYYYMM format)" + mode: "nullable" + - name: "eostatus" + type: "string" + description: "EO Status Code" + mode: "nullable" + - name: "tax_yr" + type: "integer" + description: "SOI Year" + mode: "nullable" + - name: "operatingcd" + type: "string" + description: "Operating foundation code" + mode: "nullable" + - name: "subcd" + type: "string" + description: "Subsection code" + mode: "nullable" + - name: "fairmrktvalamt" + type: "integer" + description: "Total assets – e-o-y fair market valu" + mode: "nullable" + - name: "grscontrgifts" + type: "integer" + description: "Contributions received" + mode: "nullable" + - name: "schedbind" + type: "string" + description: "Schedule B indicator" + mode: "nullable" + - name: "intrstrvnue" + type: "integer" + description: "Interest revenue" + mode: "nullable" + - name: "dividndsamt" + type: "integer" + description: "" + mode: "nullable" + - name: "grsrents" + type: "integer" + description: "Gross rents" + mode: "nullable" + - name: "grsslspramt" + type: "integer" + description: "Gross sales price for assets" + mode: "nullable" + - name: "costsold" + type: "integer" + description: "Cost-of-goods-sold" + mode: "nullable" + - name: "grsprofitbus" + type: "integer" + description: "Gross profit" + mode: "nullable" + - name: "otherincamt" + type: "integer" + description: "Other income" + mode: "nullable" + - name: "totrcptperbks" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "compofficers" + type: "integer" + description: "Compensation of officers" + mode: "nullable" + - name: "pensplemplbenf" + type: "integer" + description: "Pension plans employee benefits" + mode: "nullable" + - name: "legalfeesamt" + type: "integer" + description: "Legal fees" + mode: "nullable" + - name: "accountingfees" + type: "integer" + description: "Accounting fees" + mode: "nullable" + - name: "interestamt" + type: "integer" + description: "Interest" + mode: "nullable" + - name: "depreciationamt" + type: "integer" + description: "Depreciation and depletion" + mode: "nullable" + - name: "occupancyamt" + type: "integer" + description: "Occupancy" + mode: "nullable" + - name: "travlconfmtngs" + type: "integer" + description: "Travel conferences and meetings" + mode: "nullable" + - name: "printingpubl" + type: "integer" + description: "Printing and publications" + mode: "nullable" + - name: "topradmnexpnsa" + type: "integer" + description: "Total operating and administrative expenses column a" + mode: "nullable" + - name: "contrpdpbks" + type: "integer" + description: "Contributions gifts grants paid" + mode: "nullable" + - name: "totexpnspbks" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "excessrcpts" + type: "integer" + description: "Net income less deficit" + mode: "nullable" + - name: "totrcptnetinc" + type: "integer" + description: "Total receipts net investment income" + mode: "nullable" + - name: "topradmnexpnsb" + type: "integer" + description: "Total operating and administrative expenses column b" + mode: "nullable" + - name: "totexpnsnetinc" + type: "integer" + description: "Total expenses net investment income" + mode: "nullable" + - name: "netinvstinc" + type: "integer" + description: "Net investment income" + mode: "nullable" + - name: "trcptadjnetinc" + type: "integer" + description: "Total receipts adjusted net income" + mode: "nullable" + - name: "totexpnsadjnet" + type: "integer" + description: "Total expenses adjusted net income" + mode: "nullable" + - name: "adjnetinc" + type: "integer" + description: "Adjusted net income" + mode: "nullable" + - name: "topradmnexpnsd" + type: "integer" + description: "Total operating and administrative expenses column d" + mode: "nullable" + - name: "totexpnsexempt" + type: "integer" + description: "Total expenses – exempt purpose" + mode: "nullable" + - name: "othrcashamt" + type: "integer" + description: "Cash non-interest-bearing – e-o-y book value" + mode: "nullable" + - name: "invstgovtoblig" + type: "integer" + description: "Investments in U.S. & state government obligations – e-o-y book value" + mode: "nullable" + - name: "invstcorpstk" + type: "integer" + description: "Investments in corporate stock – e-o-y book value" + mode: "nullable" + - name: "invstcorpbnd" + type: "integer" + description: "Investments in corporate bonds– e-o-y book value" + mode: "nullable" + - name: "totinvstsec" + type: "integer" + description: "Total investments in securities – e-o-y book value" + mode: "nullable" + - name: "mrtgloans" + type: "integer" + description: "Investments mortgage loans – e-o-y book value" + mode: "nullable" + - name: "othrinvstend" + type: "integer" + description: "Other investments – e-o-y book value" + mode: "nullable" + - name: "othrassetseoy" + type: "integer" + description: "Other assets – e-o-y book value" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets – e-o-y book value" + mode: "nullable" + - name: "mrtgnotespay" + type: "integer" + description: "Mortgage loans payable – e-o-y book value" + mode: "nullable" + - name: "othrliabltseoy" + type: "integer" + description: "Other liabilities – e-o-y book value" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities – e-o-y book value" + mode: "nullable" + - name: "tfundnworth" + type: "integer" + description: "Total fund net worth – e-o-y book value" + mode: "nullable" + - name: "fairmrktvaleoy" + type: "integer" + description: "Total assets – e-o-y fair market value" + mode: "nullable" + - name: "totexcapgnls" + type: "integer" + description: "Capital gain net income" + mode: "nullable" + - name: "totexcapgn" + type: "integer" + description: "Net gain – sales of assets" + mode: "nullable" + - name: "totexcapls" + type: "integer" + description: "Net loss – sales of assets" + mode: "nullable" + - name: "invstexcisetx" + type: "integer" + description: "Excise tax on net investment income" + mode: "nullable" + - name: "sec4940notxcd" + type: "string" + description: "Section 4940 – no tax" + mode: "nullable" + - name: "sec4940redtxcd" + type: "string" + description: "Section 4940 – 1 % tax" + mode: "nullable" + - name: "sect511tx" + type: "integer" + description: "Section 511 tax" + mode: "nullable" + - name: "subtitleatx" + type: "integer" + description: "Subtitle A tax" + mode: "nullable" + - name: "totaxpyr" + type: "integer" + description: "Total excise tax" + mode: "nullable" + - name: "esttaxcr" + type: "integer" + description: "Estimated tax credit" + mode: "nullable" + - name: "txwithldsrc" + type: "integer" + description: "Tax withheld at source" + mode: "nullable" + - name: "txpaidf2758" + type: "integer" + description: "Tax paid with Form 2758 (filing extension)" + mode: "nullable" + - name: "erronbkupwthld" + type: "integer" + description: "Erroneous backup withholding credit amount" + mode: "nullable" + - name: "estpnlty" + type: "integer" + description: "Estimated tax penalty" + mode: "nullable" + - name: "taxdue" + type: "integer" + description: "Tax due" + mode: "nullable" + - name: "overpay" + type: "integer" + description: "Overpayment" + mode: "nullable" + - name: "crelamt" + type: "integer" + description: "Credit elect amount" + mode: "nullable" + - name: "infleg" + type: "string" + description: "Influence legislation?" + mode: "nullable" + - name: "actnotpr" + type: "string" + description: "Activities not previously reported?" + mode: "nullable" + - name: "chgnprvrptcd" + type: "string" + description: "Changes not previously reported?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Filed 990-T?" + mode: "nullable" + - name: "contractncd" + type: "string" + description: "Contraction?" + mode: "nullable" + - name: "furnishcpycd" + type: "string" + description: "Furnished copy to Attorney General?" + mode: "nullable" + - name: "claimstatcd" + type: "string" + description: "Claiming status?" + mode: "nullable" + - name: "cntrbtrstxyrcd" + type: "string" + description: "Substantial contributors?" + mode: "nullable" + - name: "acqdrindrintcd" + type: "string" + description: "Distribution to donor advised fund with advisory privileges?" + mode: "nullable" + - name: "orgcmplypubcd" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "filedlf1041ind" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "propexchcd" + type: "string" + description: "Property exchange?" + mode: "nullable" + - name: "brwlndmnycd" + type: "string" + description: "Borrow lend money?" + mode: "nullable" + - name: "furngoodscd" + type: "string" + description: "Furnished goods?" + mode: "nullable" + - name: "paidcmpncd" + type: "string" + description: "Paid compensation?" + mode: "nullable" + - name: "transfercd" + type: "string" + description: "Transfer?" + mode: "nullable" + - name: "agremkpaycd" + type: "string" + description: "Agree to make pay?" + mode: "nullable" + - name: "exceptactsind" + type: "string" + description: "Acts fail to qualify under section 53.4941(d)-3?" + mode: "nullable" + - name: "prioractvcd" + type: "string" + description: "Engage in acts in prior year?" + mode: "nullable" + - name: "undistrinccd" + type: "string" + description: "Undistributed income?" + mode: "nullable" + - name: "applyprovind" + type: "string" + description: "Not applying section 4942(a)(2) provisions?" + mode: "nullable" + - name: "dirindirintcd" + type: "string" + description: "Direct indirect interest?" + mode: "nullable" + - name: "excesshldcd" + type: "string" + description: "Excess business holdings?" + mode: "nullable" + - name: "invstjexmptcd" + type: "string" + description: "Jeopardizing investments?" + mode: "nullable" + - name: "prevjexmptcd" + type: "string" + description: "Prior year jeopardizing investments?" + mode: "nullable" + - name: "propgndacd" + type: "string" + description: "Propaganda?" + mode: "nullable" + - name: "ipubelectcd" + type: "string" + description: "Influence public election?" + mode: "nullable" + - name: "grntindivcd" + type: "string" + description: "Grant individual?" + mode: "nullable" + - name: "nchrtygrntcd" + type: "string" + description: "Non-charity grant?" + mode: "nullable" + - name: "nreligiouscd" + type: "string" + description: "Non-religious?" + mode: "nullable" + - name: "excptransind" + type: "string" + description: "Transactions fail to qualify under section 53.4945?" + mode: "nullable" + - name: "rfprsnlbnftind" + type: "string" + description: "Receive funds to pay premiums on personal benefit contract?" + mode: "nullable" + - name: "pyprsnlbnftind" + type: "string" + description: "Pay premiums on personal benefit contract?" + mode: "nullable" + - name: "tfairmrktunuse" + type: "integer" + description: "Fair market value of assets not used for charitable purposes" + mode: "nullable" + - name: "valncharitassets" + type: "integer" + description: "Net value of noncharitable-use assets" + mode: "nullable" + - name: "cmpmininvstret" + type: "integer" + description: "Minimum investment return" + mode: "nullable" + - name: "distribamt" + type: "integer" + description: "Distributable amount" + mode: "nullable" + - name: "undistribincyr" + type: "integer" + description: "Undistributed income" + mode: "nullable" + - name: "adjnetinccola" + type: "integer" + description: "Adjusted net income column a" + mode: "nullable" + - name: "adjnetinccolb" + type: "integer" + description: "Adjusted net income column b" + mode: "nullable" + - name: "adjnetinccolc" + type: "integer" + description: "Adjusted net income column c" + mode: "nullable" + - name: "adjnetinccold" + type: "integer" + description: "Adjusted net income column d" + mode: "nullable" + - name: "adjnetinctot" + type: "integer" + description: "Adjusted net income total" + mode: "nullable" + - name: "qlfydistriba" + type: "integer" + description: "Qualifying distributions column a" + mode: "nullable" + - name: "qlfydistribb" + type: "integer" + description: "Qualifying distributions column b" + mode: "nullable" + - name: "qlfydistribc" + type: "integer" + description: "Qualifying distributions column c" + mode: "nullable" + - name: "qlfydistribd" + type: "integer" + description: "Qualifying distributions column d" + mode: "nullable" + - name: "qlfydistribtot" + type: "integer" + description: "Qualifying distributions total" + mode: "nullable" + - name: "valassetscola" + type: "integer" + description: "Value assets column a" + mode: "nullable" + - name: "valassetscolb" + type: "integer" + description: "Value assets column b" + mode: "nullable" + - name: "valassetscolc" + type: "integer" + description: "Value assets column c" + mode: "nullable" + - name: "valassetscold" + type: "integer" + description: "Value assets column d" + mode: "nullable" + - name: "valassetstot" + type: "integer" + description: "Value assets total" + mode: "nullable" + - name: "qlfyasseta" + type: "integer" + description: "Qualifying assets column a" + mode: "nullable" + - name: "qlfyassetb" + type: "integer" + description: "Qualifying assets column b" + mode: "nullable" + - name: "qlfyassetc" + type: "integer" + description: "Qualifying assets column c" + mode: "nullable" + - name: "qlfyassetd" + type: "integer" + description: "Qualifying assets column d" + mode: "nullable" + - name: "qlfyassettot" + type: "integer" + description: "Qualifying assets total" + mode: "nullable" + - name: "endwmntscola" + type: "integer" + description: "Endowments column a" + mode: "nullable" + - name: "endwmntscolb" + type: "integer" + description: "Endowments column b" + mode: "nullable" + - name: "endwmntscolc" + type: "integer" + description: "Endowments column c" + mode: "nullable" + - name: "endwmntscold" + type: "integer" + description: "Endowments column d" + mode: "nullable" + - name: "endwmntstot" + type: "integer" + description: "Endowments total" + mode: "nullable" + - name: "totsuprtcola" + type: "integer" + description: "Total support column a" + mode: "nullable" + - name: "totsuprtcolb" + type: "integer" + description: "Total support column b" + mode: "nullable" + - name: "totsuprtcolc" + type: "integer" + description: "Total support column c" + mode: "nullable" + - name: "totsuprtcold" + type: "integer" + description: "Total support column d" + mode: "nullable" + - name: "totsuprttot" + type: "integer" + description: "Total support total" + mode: "nullable" + - name: "pubsuprtcola" + type: "integer" + description: "Public support column a" + mode: "nullable" + - name: "pubsuprtcolb" + type: "integer" + description: "Public support column b" + mode: "nullable" + - name: "pubsuprtcolc" + type: "integer" + description: "Public support column c" + mode: "nullable" + - name: "pubsuprtcold" + type: "integer" + description: "Public support column d" + mode: "nullable" + - name: "pubsuprttot" + type: "integer" + description: "Public support total" + mode: "nullable" + - name: "grsinvstinca" + type: "integer" + description: "Gross investment income column a" + mode: "nullable" + - name: "grsinvstincb" + type: "integer" + description: "Gross investment income column b" + mode: "nullable" + - name: "grsinvstincc" + type: "integer" + description: "Gross investment income column c" + mode: "nullable" + - name: "grsinvstincd" + type: "integer" + description: "Gross investment income column d" + mode: "nullable" + - name: "grsinvstinctot" + type: "integer" + description: "Gross investment income total" + mode: "nullable" + - name: "grntapprvfut" + type: "integer" + description: "Grants approved for future payment" + mode: "nullable" + - name: "progsrvcacold" + type: "integer" + description: "Program service revenue line 1a (excluded)" + mode: "nullable" + - name: "progsrvcacole" + type: "integer" + description: "Program service revenue line 1a (exempt)" + mode: "nullable" + - name: "progsrvcbcold" + type: "integer" + description: "Program service revenue line 1b (excluded)" + mode: "nullable" + - name: "progsrvcbcole" + type: "integer" + description: "Program service revenue line 1b (exempt)" + mode: "nullable" + - name: "progsrvcccold" + type: "integer" + description: "Program service revenue line 1c (excluded)" + mode: "nullable" + - name: "progsrvcccole" + type: "integer" + description: "Program service revenue line 1c (exempt)" + mode: "nullable" + - name: "progsrvcdcold" + type: "integer" + description: "Program service revenue line 1d (excluded)" + mode: "nullable" + - name: "progsrvcdcole" + type: "integer" + description: "Program service revenue line 1d (exempt)" + mode: "nullable" + - name: "progsrvcecold" + type: "integer" + description: "Program service revenue line 1e (excluded)" + mode: "nullable" + - name: "progsrvcecole" + type: "integer" + description: "Program service revenue line 1e (exempt)" + mode: "nullable" + - name: "progsrvcfcold" + type: "integer" + description: "Program service revenue line 1f (excluded)" + mode: "nullable" + - name: "progsrvcfcole" + type: "integer" + description: "Program service revenue line 1f (exempt)" + mode: "nullable" + - name: "progsrvcgcold" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (excluded)" + mode: "nullable" + - name: "progsrvcgcole" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (exempt)" + mode: "nullable" + - name: "membershpduesd" + type: "integer" + description: "Membership dues and assessments (excluded)" + mode: "nullable" + - name: "membershpduese" + type: "integer" + description: "Membership dues and assessments (exempt)" + mode: "nullable" + - name: "intonsvngsd" + type: "integer" + description: "Interest on savings and temporary cash investments (excluded)" + mode: "nullable" + - name: "intonsvngse" + type: "integer" + description: "Interest on savings and temporary cash investments (exempt)" + mode: "nullable" + - name: "dvdndsintd" + type: "integer" + description: "Dividends and interest from securities (excluded)" + mode: "nullable" + - name: "dvdndsinte" + type: "integer" + description: "Dividends and interest from securities (exempt)" + mode: "nullable" + - name: "trnsfrcashcd" + type: "string" + description: "Transfer cash to noncharitable exempt organization?" + mode: "nullable" + - name: "trnsothasstscd" + type: "string" + description: "Transfer other assets to noncharitable exempt organization?" + mode: "nullable" + - name: "salesasstscd" + type: "string" + description: "Sale of assets to noncharitable exempt organization?" + mode: "nullable" + - name: "prchsasstscd" + type: "string" + description: "Purchase of assets from noncharitable exempt organization?" + mode: "nullable" + - name: "rentlsfacltscd" + type: "string" + description: "Rental of facilities or other assets?" + mode: "nullable" + - name: "reimbrsmntscd" + type: "string" + description: "Reimbursements arrangements?" + mode: "nullable" + - name: "loansguarcd" + type: "string" + description: "Loans or other guarantees?" + mode: "nullable" + - name: "perfservicescd" + type: "string" + description: "Performance of services or membership or fundraising solicitations?" + mode: "nullable" + - name: "sharngasstscd" + type: "string" + description: "Sharing of facilities equipment mailing lists other assets or paid employees?" + mode: "nullable" + + + graph_paths: + - "irs_990_pf_2014_transform_csv >> load_irs_990_pf_2014_to_bq" diff --git a/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py b/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py new file mode 100644 index 000000000..1606116c3 --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py @@ -0,0 +1,1143 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_pf_2015", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_pf_2015_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_pf_2015_transform_csv", + startup_timeout_seconds=600, + name="irs_990_pf_2015", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/15eofinextract990pf.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_pf_2015/data_output.csv", + "PIPELINE_NAME": "irs_990_pf_2015", + "CSV_HEADERS": '["ein","elf","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","distribdafcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"]', + "RENAME_MAPPINGS": '{"ELF": "elf","ELFCD": "elf","EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","DISTRIBDAFCD": "distribdafcd","ACQDRINDRINTCD": "distribdafcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"}', + }, + resources={"request_memory": "2G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_pf_2015_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_pf_2015_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_pf_2015/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_pf_2015", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_prd", + "type": "string", + "description": "Tax period (YYYYMM format)", + "mode": "nullable", + }, + { + "name": "eostatus", + "type": "string", + "description": "EO Status Code", + "mode": "nullable", + }, + { + "name": "tax_yr", + "type": "integer", + "description": "SOI Year", + "mode": "nullable", + }, + { + "name": "operatingcd", + "type": "string", + "description": "Operating foundation code", + "mode": "nullable", + }, + { + "name": "subcd", + "type": "string", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "fairmrktvalamt", + "type": "integer", + "description": "Total assets – e-o-y fair market valu", + "mode": "nullable", + }, + { + "name": "grscontrgifts", + "type": "integer", + "description": "Contributions received", + "mode": "nullable", + }, + { + "name": "schedbind", + "type": "string", + "description": "Schedule B indicator", + "mode": "nullable", + }, + { + "name": "intrstrvnue", + "type": "integer", + "description": "Interest revenue", + "mode": "nullable", + }, + { + "name": "dividndsamt", + "type": "integer", + "description": "", + "mode": "nullable", + }, + { + "name": "grsrents", + "type": "integer", + "description": "Gross rents", + "mode": "nullable", + }, + { + "name": "grsslspramt", + "type": "integer", + "description": "Gross sales price for assets", + "mode": "nullable", + }, + { + "name": "costsold", + "type": "integer", + "description": "Cost-of-goods-sold", + "mode": "nullable", + }, + { + "name": "grsprofitbus", + "type": "integer", + "description": "Gross profit", + "mode": "nullable", + }, + { + "name": "otherincamt", + "type": "integer", + "description": "Other income", + "mode": "nullable", + }, + { + "name": "totrcptperbks", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "compofficers", + "type": "integer", + "description": "Compensation of officers", + "mode": "nullable", + }, + { + "name": "pensplemplbenf", + "type": "integer", + "description": "Pension plans employee benefits", + "mode": "nullable", + }, + { + "name": "legalfeesamt", + "type": "integer", + "description": "Legal fees", + "mode": "nullable", + }, + { + "name": "accountingfees", + "type": "integer", + "description": "Accounting fees", + "mode": "nullable", + }, + { + "name": "interestamt", + "type": "integer", + "description": "Interest", + "mode": "nullable", + }, + { + "name": "depreciationamt", + "type": "integer", + "description": "Depreciation and depletion", + "mode": "nullable", + }, + { + "name": "occupancyamt", + "type": "integer", + "description": "Occupancy", + "mode": "nullable", + }, + { + "name": "travlconfmtngs", + "type": "integer", + "description": "Travel conferences and meetings", + "mode": "nullable", + }, + { + "name": "printingpubl", + "type": "integer", + "description": "Printing and publications", + "mode": "nullable", + }, + { + "name": "topradmnexpnsa", + "type": "integer", + "description": "Total operating and administrative expenses column a", + "mode": "nullable", + }, + { + "name": "contrpdpbks", + "type": "integer", + "description": "Contributions gifts grants paid", + "mode": "nullable", + }, + { + "name": "totexpnspbks", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "excessrcpts", + "type": "integer", + "description": "Net income less deficit", + "mode": "nullable", + }, + { + "name": "totrcptnetinc", + "type": "integer", + "description": "Total receipts net investment income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsb", + "type": "integer", + "description": "Total operating and administrative expenses column b", + "mode": "nullable", + }, + { + "name": "totexpnsnetinc", + "type": "integer", + "description": "Total expenses net investment income", + "mode": "nullable", + }, + { + "name": "netinvstinc", + "type": "integer", + "description": "Net investment income", + "mode": "nullable", + }, + { + "name": "trcptadjnetinc", + "type": "integer", + "description": "Total receipts adjusted net income", + "mode": "nullable", + }, + { + "name": "totexpnsadjnet", + "type": "integer", + "description": "Total expenses adjusted net income", + "mode": "nullable", + }, + { + "name": "adjnetinc", + "type": "integer", + "description": "Adjusted net income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsd", + "type": "integer", + "description": "Total operating and administrative expenses column d", + "mode": "nullable", + }, + { + "name": "totexpnsexempt", + "type": "integer", + "description": "Total expenses – exempt purpose", + "mode": "nullable", + }, + { + "name": "othrcashamt", + "type": "integer", + "description": "Cash non-interest-bearing – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstgovtoblig", + "type": "integer", + "description": "Investments in U.S. & state government obligations – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpstk", + "type": "integer", + "description": "Investments in corporate stock – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpbnd", + "type": "integer", + "description": "Investments in corporate bonds– e-o-y book value", + "mode": "nullable", + }, + { + "name": "totinvstsec", + "type": "integer", + "description": "Total investments in securities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgloans", + "type": "integer", + "description": "Investments mortgage loans – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrinvstend", + "type": "integer", + "description": "Other investments – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrassetseoy", + "type": "integer", + "description": "Other assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgnotespay", + "type": "integer", + "description": "Mortgage loans payable – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrliabltseoy", + "type": "integer", + "description": "Other liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "tfundnworth", + "type": "integer", + "description": "Total fund net worth – e-o-y book value", + "mode": "nullable", + }, + { + "name": "fairmrktvaleoy", + "type": "integer", + "description": "Total assets – e-o-y fair market value", + "mode": "nullable", + }, + { + "name": "totexcapgnls", + "type": "integer", + "description": "Capital gain net income", + "mode": "nullable", + }, + { + "name": "totexcapgn", + "type": "integer", + "description": "Net gain – sales of assets", + "mode": "nullable", + }, + { + "name": "totexcapls", + "type": "integer", + "description": "Net loss – sales of assets", + "mode": "nullable", + }, + { + "name": "invstexcisetx", + "type": "integer", + "description": "Excise tax on net investment income", + "mode": "nullable", + }, + { + "name": "sec4940notxcd", + "type": "string", + "description": "Section 4940 – no tax", + "mode": "nullable", + }, + { + "name": "sec4940redtxcd", + "type": "string", + "description": "Section 4940 – 1 % tax", + "mode": "nullable", + }, + { + "name": "sect511tx", + "type": "integer", + "description": "Section 511 tax", + "mode": "nullable", + }, + { + "name": "subtitleatx", + "type": "integer", + "description": "Subtitle A tax", + "mode": "nullable", + }, + { + "name": "totaxpyr", + "type": "integer", + "description": "Total excise tax", + "mode": "nullable", + }, + { + "name": "esttaxcr", + "type": "integer", + "description": "Estimated tax credit", + "mode": "nullable", + }, + { + "name": "txwithldsrc", + "type": "integer", + "description": "Tax withheld at source", + "mode": "nullable", + }, + { + "name": "txpaidf2758", + "type": "integer", + "description": "Tax paid with Form 2758 (filing extension)", + "mode": "nullable", + }, + { + "name": "erronbkupwthld", + "type": "integer", + "description": "Erroneous backup withholding credit amount", + "mode": "nullable", + }, + { + "name": "estpnlty", + "type": "integer", + "description": "Estimated tax penalty", + "mode": "nullable", + }, + { + "name": "taxdue", + "type": "integer", + "description": "Tax due", + "mode": "nullable", + }, + { + "name": "overpay", + "type": "integer", + "description": "Overpayment", + "mode": "nullable", + }, + { + "name": "crelamt", + "type": "integer", + "description": "Credit elect amount", + "mode": "nullable", + }, + { + "name": "infleg", + "type": "string", + "description": "Influence legislation?", + "mode": "nullable", + }, + { + "name": "actnotpr", + "type": "string", + "description": "Activities not previously reported?", + "mode": "nullable", + }, + { + "name": "chgnprvrptcd", + "type": "string", + "description": "Changes not previously reported?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Filed 990-T?", + "mode": "nullable", + }, + { + "name": "contractncd", + "type": "string", + "description": "Contraction?", + "mode": "nullable", + }, + { + "name": "furnishcpycd", + "type": "string", + "description": "Furnished copy to Attorney General?", + "mode": "nullable", + }, + { + "name": "claimstatcd", + "type": "string", + "description": "Claiming status?", + "mode": "nullable", + }, + { + "name": "cntrbtrstxyrcd", + "type": "string", + "description": "Substantial contributors?", + "mode": "nullable", + }, + { + "name": "distribdafcd", + "type": "string", + "description": "Distribution to donor advised fund with advisory privileges?", + "mode": "nullable", + }, + { + "name": "orgcmplypubcd", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "filedlf1041ind", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "propexchcd", + "type": "string", + "description": "Property exchange?", + "mode": "nullable", + }, + { + "name": "brwlndmnycd", + "type": "string", + "description": "Borrow lend money?", + "mode": "nullable", + }, + { + "name": "furngoodscd", + "type": "string", + "description": "Furnished goods?", + "mode": "nullable", + }, + { + "name": "paidcmpncd", + "type": "string", + "description": "Paid compensation?", + "mode": "nullable", + }, + { + "name": "transfercd", + "type": "string", + "description": "Transfer?", + "mode": "nullable", + }, + { + "name": "agremkpaycd", + "type": "string", + "description": "Agree to make pay?", + "mode": "nullable", + }, + { + "name": "exceptactsind", + "type": "string", + "description": "Acts fail to qualify under section 53.4941(d)-3?", + "mode": "nullable", + }, + { + "name": "prioractvcd", + "type": "string", + "description": "Engage in acts in prior year?", + "mode": "nullable", + }, + { + "name": "undistrinccd", + "type": "string", + "description": "Undistributed income?", + "mode": "nullable", + }, + { + "name": "applyprovind", + "type": "string", + "description": "Not applying section 4942(a)(2) provisions?", + "mode": "nullable", + }, + { + "name": "dirindirintcd", + "type": "string", + "description": "Direct indirect interest?", + "mode": "nullable", + }, + { + "name": "excesshldcd", + "type": "string", + "description": "Excess business holdings?", + "mode": "nullable", + }, + { + "name": "invstjexmptcd", + "type": "string", + "description": "Jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "prevjexmptcd", + "type": "string", + "description": "Prior year jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "propgndacd", + "type": "string", + "description": "Propaganda?", + "mode": "nullable", + }, + { + "name": "ipubelectcd", + "type": "string", + "description": "Influence public election?", + "mode": "nullable", + }, + { + "name": "grntindivcd", + "type": "string", + "description": "Grant individual?", + "mode": "nullable", + }, + { + "name": "nchrtygrntcd", + "type": "string", + "description": "Non-charity grant?", + "mode": "nullable", + }, + { + "name": "nreligiouscd", + "type": "string", + "description": "Non-religious?", + "mode": "nullable", + }, + { + "name": "excptransind", + "type": "string", + "description": "Transactions fail to qualify under section 53.4945?", + "mode": "nullable", + }, + { + "name": "rfprsnlbnftind", + "type": "string", + "description": "Receive funds to pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "pyprsnlbnftind", + "type": "string", + "description": "Pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "tfairmrktunuse", + "type": "integer", + "description": "Fair market value of assets not used for charitable purposes", + "mode": "nullable", + }, + { + "name": "valncharitassets", + "type": "integer", + "description": "Net value of noncharitable-use assets", + "mode": "nullable", + }, + { + "name": "cmpmininvstret", + "type": "integer", + "description": "Minimum investment return", + "mode": "nullable", + }, + { + "name": "distribamt", + "type": "integer", + "description": "Distributable amount", + "mode": "nullable", + }, + { + "name": "undistribincyr", + "type": "integer", + "description": "Undistributed income", + "mode": "nullable", + }, + { + "name": "adjnetinccola", + "type": "integer", + "description": "Adjusted net income column a", + "mode": "nullable", + }, + { + "name": "adjnetinccolb", + "type": "integer", + "description": "Adjusted net income column b", + "mode": "nullable", + }, + { + "name": "adjnetinccolc", + "type": "integer", + "description": "Adjusted net income column c", + "mode": "nullable", + }, + { + "name": "adjnetinccold", + "type": "integer", + "description": "Adjusted net income column d", + "mode": "nullable", + }, + { + "name": "adjnetinctot", + "type": "integer", + "description": "Adjusted net income total", + "mode": "nullable", + }, + { + "name": "qlfydistriba", + "type": "integer", + "description": "Qualifying distributions column a", + "mode": "nullable", + }, + { + "name": "qlfydistribb", + "type": "integer", + "description": "Qualifying distributions column b", + "mode": "nullable", + }, + { + "name": "qlfydistribc", + "type": "integer", + "description": "Qualifying distributions column c", + "mode": "nullable", + }, + { + "name": "qlfydistribd", + "type": "integer", + "description": "Qualifying distributions column d", + "mode": "nullable", + }, + { + "name": "qlfydistribtot", + "type": "integer", + "description": "Qualifying distributions total", + "mode": "nullable", + }, + { + "name": "valassetscola", + "type": "integer", + "description": "Value assets column a", + "mode": "nullable", + }, + { + "name": "valassetscolb", + "type": "integer", + "description": "Value assets column b", + "mode": "nullable", + }, + { + "name": "valassetscolc", + "type": "integer", + "description": "Value assets column c", + "mode": "nullable", + }, + { + "name": "valassetscold", + "type": "integer", + "description": "Value assets column d", + "mode": "nullable", + }, + { + "name": "valassetstot", + "type": "integer", + "description": "Value assets total", + "mode": "nullable", + }, + { + "name": "qlfyasseta", + "type": "integer", + "description": "Qualifying assets column a", + "mode": "nullable", + }, + { + "name": "qlfyassetb", + "type": "integer", + "description": "Qualifying assets column b", + "mode": "nullable", + }, + { + "name": "qlfyassetc", + "type": "integer", + "description": "Qualifying assets column c", + "mode": "nullable", + }, + { + "name": "qlfyassetd", + "type": "integer", + "description": "Qualifying assets column d", + "mode": "nullable", + }, + { + "name": "qlfyassettot", + "type": "integer", + "description": "Qualifying assets total", + "mode": "nullable", + }, + { + "name": "endwmntscola", + "type": "integer", + "description": "Endowments column a", + "mode": "nullable", + }, + { + "name": "endwmntscolb", + "type": "integer", + "description": "Endowments column b", + "mode": "nullable", + }, + { + "name": "endwmntscolc", + "type": "integer", + "description": "Endowments column c", + "mode": "nullable", + }, + { + "name": "endwmntscold", + "type": "integer", + "description": "Endowments column d", + "mode": "nullable", + }, + { + "name": "endwmntstot", + "type": "integer", + "description": "Endowments total", + "mode": "nullable", + }, + { + "name": "totsuprtcola", + "type": "integer", + "description": "Total support column a", + "mode": "nullable", + }, + { + "name": "totsuprtcolb", + "type": "integer", + "description": "Total support column b", + "mode": "nullable", + }, + { + "name": "totsuprtcolc", + "type": "integer", + "description": "Total support column c", + "mode": "nullable", + }, + { + "name": "totsuprtcold", + "type": "integer", + "description": "Total support column d", + "mode": "nullable", + }, + { + "name": "totsuprttot", + "type": "integer", + "description": "Total support total", + "mode": "nullable", + }, + { + "name": "pubsuprtcola", + "type": "integer", + "description": "Public support column a", + "mode": "nullable", + }, + { + "name": "pubsuprtcolb", + "type": "integer", + "description": "Public support column b", + "mode": "nullable", + }, + { + "name": "pubsuprtcolc", + "type": "integer", + "description": "Public support column c", + "mode": "nullable", + }, + { + "name": "pubsuprtcold", + "type": "integer", + "description": "Public support column d", + "mode": "nullable", + }, + { + "name": "pubsuprttot", + "type": "integer", + "description": "Public support total", + "mode": "nullable", + }, + { + "name": "grsinvstinca", + "type": "integer", + "description": "Gross investment income column a", + "mode": "nullable", + }, + { + "name": "grsinvstincb", + "type": "integer", + "description": "Gross investment income column b", + "mode": "nullable", + }, + { + "name": "grsinvstincc", + "type": "integer", + "description": "Gross investment income column c", + "mode": "nullable", + }, + { + "name": "grsinvstincd", + "type": "integer", + "description": "Gross investment income column d", + "mode": "nullable", + }, + { + "name": "grsinvstinctot", + "type": "integer", + "description": "Gross investment income total", + "mode": "nullable", + }, + { + "name": "grntapprvfut", + "type": "integer", + "description": "Grants approved for future payment", + "mode": "nullable", + }, + { + "name": "progsrvcacold", + "type": "integer", + "description": "Program service revenue line 1a (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcacole", + "type": "integer", + "description": "Program service revenue line 1a (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcbcold", + "type": "integer", + "description": "Program service revenue line 1b (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcbcole", + "type": "integer", + "description": "Program service revenue line 1b (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcccold", + "type": "integer", + "description": "Program service revenue line 1c (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcccole", + "type": "integer", + "description": "Program service revenue line 1c (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcdcold", + "type": "integer", + "description": "Program service revenue line 1d (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcdcole", + "type": "integer", + "description": "Program service revenue line 1d (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcecold", + "type": "integer", + "description": "Program service revenue line 1e (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcecole", + "type": "integer", + "description": "Program service revenue line 1e (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcfcold", + "type": "integer", + "description": "Program service revenue line 1f (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcfcole", + "type": "integer", + "description": "Program service revenue line 1f (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcgcold", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcgcole", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (exempt)", + "mode": "nullable", + }, + { + "name": "membershpduesd", + "type": "integer", + "description": "Membership dues and assessments (excluded)", + "mode": "nullable", + }, + { + "name": "membershpduese", + "type": "integer", + "description": "Membership dues and assessments (exempt)", + "mode": "nullable", + }, + { + "name": "intonsvngsd", + "type": "integer", + "description": "Interest on savings and temporary cash investments (excluded)", + "mode": "nullable", + }, + { + "name": "intonsvngse", + "type": "integer", + "description": "Interest on savings and temporary cash investments (exempt)", + "mode": "nullable", + }, + { + "name": "dvdndsintd", + "type": "integer", + "description": "Dividends and interest from securities (excluded)", + "mode": "nullable", + }, + { + "name": "dvdndsinte", + "type": "integer", + "description": "Dividends and interest from securities (exempt)", + "mode": "nullable", + }, + { + "name": "trnsfrcashcd", + "type": "string", + "description": "Transfer cash to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "trnsothasstscd", + "type": "string", + "description": "Transfer other assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "salesasstscd", + "type": "string", + "description": "Sale of assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "prchsasstscd", + "type": "string", + "description": "Purchase of assets from noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "rentlsfacltscd", + "type": "string", + "description": "Rental of facilities or other assets?", + "mode": "nullable", + }, + { + "name": "reimbrsmntscd", + "type": "string", + "description": "Reimbursements arrangements?", + "mode": "nullable", + }, + { + "name": "loansguarcd", + "type": "string", + "description": "Loans or other guarantees?", + "mode": "nullable", + }, + { + "name": "perfservicescd", + "type": "string", + "description": "Performance of services or membership or fundraising solicitations?", + "mode": "nullable", + }, + { + "name": "sharngasstscd", + "type": "string", + "description": "Sharing of facilities equipment mailing lists other assets or paid employees?", + "mode": "nullable", + }, + ], + ) + + irs_990_pf_2015_transform_csv >> load_irs_990_pf_2015_to_bq diff --git a/datasets/irs_990/irs_990_pf_2015/pipeline.yaml b/datasets/irs_990/irs_990_pf_2015/pipeline.yaml new file mode 100644 index 000000000..82e40120b --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2015/pipeline.yaml @@ -0,0 +1,827 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_pf_2015 + + # Description of the table + description: "IRS 990 PF 2015 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_pf_2015 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_pf_2015_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_pf_2015" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/15eofinextract990pf.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_pf_2015/data_output.csv" + PIPELINE_NAME: "irs_990_pf_2015" + CSV_HEADERS: >- + ["ein","elf","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","distribdafcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"] + RENAME_MAPPINGS: >- + {"ELF": "elf","ELFCD": "elf","EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","DISTRIBDAFCD": "distribdafcd","ACQDRINDRINTCD": "distribdafcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"} + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "2G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_pf_2015_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_pf_2015/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_pf_2015" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "elf" + type: "string" + description: "E-file indicator" + mode: "nullable" + - name: "tax_prd" + type: "string" + description: "Tax period (YYYYMM format)" + mode: "nullable" + - name: "eostatus" + type: "string" + description: "EO Status Code" + mode: "nullable" + - name: "tax_yr" + type: "integer" + description: "SOI Year" + mode: "nullable" + - name: "operatingcd" + type: "string" + description: "Operating foundation code" + mode: "nullable" + - name: "subcd" + type: "string" + description: "Subsection code" + mode: "nullable" + - name: "fairmrktvalamt" + type: "integer" + description: "Total assets – e-o-y fair market valu" + mode: "nullable" + - name: "grscontrgifts" + type: "integer" + description: "Contributions received" + mode: "nullable" + - name: "schedbind" + type: "string" + description: "Schedule B indicator" + mode: "nullable" + - name: "intrstrvnue" + type: "integer" + description: "Interest revenue" + mode: "nullable" + - name: "dividndsamt" + type: "integer" + description: "" + mode: "nullable" + - name: "grsrents" + type: "integer" + description: "Gross rents" + mode: "nullable" + - name: "grsslspramt" + type: "integer" + description: "Gross sales price for assets" + mode: "nullable" + - name: "costsold" + type: "integer" + description: "Cost-of-goods-sold" + mode: "nullable" + - name: "grsprofitbus" + type: "integer" + description: "Gross profit" + mode: "nullable" + - name: "otherincamt" + type: "integer" + description: "Other income" + mode: "nullable" + - name: "totrcptperbks" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "compofficers" + type: "integer" + description: "Compensation of officers" + mode: "nullable" + - name: "pensplemplbenf" + type: "integer" + description: "Pension plans employee benefits" + mode: "nullable" + - name: "legalfeesamt" + type: "integer" + description: "Legal fees" + mode: "nullable" + - name: "accountingfees" + type: "integer" + description: "Accounting fees" + mode: "nullable" + - name: "interestamt" + type: "integer" + description: "Interest" + mode: "nullable" + - name: "depreciationamt" + type: "integer" + description: "Depreciation and depletion" + mode: "nullable" + - name: "occupancyamt" + type: "integer" + description: "Occupancy" + mode: "nullable" + - name: "travlconfmtngs" + type: "integer" + description: "Travel conferences and meetings" + mode: "nullable" + - name: "printingpubl" + type: "integer" + description: "Printing and publications" + mode: "nullable" + - name: "topradmnexpnsa" + type: "integer" + description: "Total operating and administrative expenses column a" + mode: "nullable" + - name: "contrpdpbks" + type: "integer" + description: "Contributions gifts grants paid" + mode: "nullable" + - name: "totexpnspbks" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "excessrcpts" + type: "integer" + description: "Net income less deficit" + mode: "nullable" + - name: "totrcptnetinc" + type: "integer" + description: "Total receipts net investment income" + mode: "nullable" + - name: "topradmnexpnsb" + type: "integer" + description: "Total operating and administrative expenses column b" + mode: "nullable" + - name: "totexpnsnetinc" + type: "integer" + description: "Total expenses net investment income" + mode: "nullable" + - name: "netinvstinc" + type: "integer" + description: "Net investment income" + mode: "nullable" + - name: "trcptadjnetinc" + type: "integer" + description: "Total receipts adjusted net income" + mode: "nullable" + - name: "totexpnsadjnet" + type: "integer" + description: "Total expenses adjusted net income" + mode: "nullable" + - name: "adjnetinc" + type: "integer" + description: "Adjusted net income" + mode: "nullable" + - name: "topradmnexpnsd" + type: "integer" + description: "Total operating and administrative expenses column d" + mode: "nullable" + - name: "totexpnsexempt" + type: "integer" + description: "Total expenses – exempt purpose" + mode: "nullable" + - name: "othrcashamt" + type: "integer" + description: "Cash non-interest-bearing – e-o-y book value" + mode: "nullable" + - name: "invstgovtoblig" + type: "integer" + description: "Investments in U.S. & state government obligations – e-o-y book value" + mode: "nullable" + - name: "invstcorpstk" + type: "integer" + description: "Investments in corporate stock – e-o-y book value" + mode: "nullable" + - name: "invstcorpbnd" + type: "integer" + description: "Investments in corporate bonds– e-o-y book value" + mode: "nullable" + - name: "totinvstsec" + type: "integer" + description: "Total investments in securities – e-o-y book value" + mode: "nullable" + - name: "mrtgloans" + type: "integer" + description: "Investments mortgage loans – e-o-y book value" + mode: "nullable" + - name: "othrinvstend" + type: "integer" + description: "Other investments – e-o-y book value" + mode: "nullable" + - name: "othrassetseoy" + type: "integer" + description: "Other assets – e-o-y book value" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets – e-o-y book value" + mode: "nullable" + - name: "mrtgnotespay" + type: "integer" + description: "Mortgage loans payable – e-o-y book value" + mode: "nullable" + - name: "othrliabltseoy" + type: "integer" + description: "Other liabilities – e-o-y book value" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities – e-o-y book value" + mode: "nullable" + - name: "tfundnworth" + type: "integer" + description: "Total fund net worth – e-o-y book value" + mode: "nullable" + - name: "fairmrktvaleoy" + type: "integer" + description: "Total assets – e-o-y fair market value" + mode: "nullable" + - name: "totexcapgnls" + type: "integer" + description: "Capital gain net income" + mode: "nullable" + - name: "totexcapgn" + type: "integer" + description: "Net gain – sales of assets" + mode: "nullable" + - name: "totexcapls" + type: "integer" + description: "Net loss – sales of assets" + mode: "nullable" + - name: "invstexcisetx" + type: "integer" + description: "Excise tax on net investment income" + mode: "nullable" + - name: "sec4940notxcd" + type: "string" + description: "Section 4940 – no tax" + mode: "nullable" + - name: "sec4940redtxcd" + type: "string" + description: "Section 4940 – 1 % tax" + mode: "nullable" + - name: "sect511tx" + type: "integer" + description: "Section 511 tax" + mode: "nullable" + - name: "subtitleatx" + type: "integer" + description: "Subtitle A tax" + mode: "nullable" + - name: "totaxpyr" + type: "integer" + description: "Total excise tax" + mode: "nullable" + - name: "esttaxcr" + type: "integer" + description: "Estimated tax credit" + mode: "nullable" + - name: "txwithldsrc" + type: "integer" + description: "Tax withheld at source" + mode: "nullable" + - name: "txpaidf2758" + type: "integer" + description: "Tax paid with Form 2758 (filing extension)" + mode: "nullable" + - name: "erronbkupwthld" + type: "integer" + description: "Erroneous backup withholding credit amount" + mode: "nullable" + - name: "estpnlty" + type: "integer" + description: "Estimated tax penalty" + mode: "nullable" + - name: "taxdue" + type: "integer" + description: "Tax due" + mode: "nullable" + - name: "overpay" + type: "integer" + description: "Overpayment" + mode: "nullable" + - name: "crelamt" + type: "integer" + description: "Credit elect amount" + mode: "nullable" + - name: "infleg" + type: "string" + description: "Influence legislation?" + mode: "nullable" + - name: "actnotpr" + type: "string" + description: "Activities not previously reported?" + mode: "nullable" + - name: "chgnprvrptcd" + type: "string" + description: "Changes not previously reported?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Filed 990-T?" + mode: "nullable" + - name: "contractncd" + type: "string" + description: "Contraction?" + mode: "nullable" + - name: "furnishcpycd" + type: "string" + description: "Furnished copy to Attorney General?" + mode: "nullable" + - name: "claimstatcd" + type: "string" + description: "Claiming status?" + mode: "nullable" + - name: "cntrbtrstxyrcd" + type: "string" + description: "Substantial contributors?" + mode: "nullable" + - name: "distribdafcd" + type: "string" + description: "Distribution to donor advised fund with advisory privileges?" + mode: "nullable" + - name: "orgcmplypubcd" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "filedlf1041ind" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "propexchcd" + type: "string" + description: "Property exchange?" + mode: "nullable" + - name: "brwlndmnycd" + type: "string" + description: "Borrow lend money?" + mode: "nullable" + - name: "furngoodscd" + type: "string" + description: "Furnished goods?" + mode: "nullable" + - name: "paidcmpncd" + type: "string" + description: "Paid compensation?" + mode: "nullable" + - name: "transfercd" + type: "string" + description: "Transfer?" + mode: "nullable" + - name: "agremkpaycd" + type: "string" + description: "Agree to make pay?" + mode: "nullable" + - name: "exceptactsind" + type: "string" + description: "Acts fail to qualify under section 53.4941(d)-3?" + mode: "nullable" + - name: "prioractvcd" + type: "string" + description: "Engage in acts in prior year?" + mode: "nullable" + - name: "undistrinccd" + type: "string" + description: "Undistributed income?" + mode: "nullable" + - name: "applyprovind" + type: "string" + description: "Not applying section 4942(a)(2) provisions?" + mode: "nullable" + - name: "dirindirintcd" + type: "string" + description: "Direct indirect interest?" + mode: "nullable" + - name: "excesshldcd" + type: "string" + description: "Excess business holdings?" + mode: "nullable" + - name: "invstjexmptcd" + type: "string" + description: "Jeopardizing investments?" + mode: "nullable" + - name: "prevjexmptcd" + type: "string" + description: "Prior year jeopardizing investments?" + mode: "nullable" + - name: "propgndacd" + type: "string" + description: "Propaganda?" + mode: "nullable" + - name: "ipubelectcd" + type: "string" + description: "Influence public election?" + mode: "nullable" + - name: "grntindivcd" + type: "string" + description: "Grant individual?" + mode: "nullable" + - name: "nchrtygrntcd" + type: "string" + description: "Non-charity grant?" + mode: "nullable" + - name: "nreligiouscd" + type: "string" + description: "Non-religious?" + mode: "nullable" + - name: "excptransind" + type: "string" + description: "Transactions fail to qualify under section 53.4945?" + mode: "nullable" + - name: "rfprsnlbnftind" + type: "string" + description: "Receive funds to pay premiums on personal benefit contract?" + mode: "nullable" + - name: "pyprsnlbnftind" + type: "string" + description: "Pay premiums on personal benefit contract?" + mode: "nullable" + - name: "tfairmrktunuse" + type: "integer" + description: "Fair market value of assets not used for charitable purposes" + mode: "nullable" + - name: "valncharitassets" + type: "integer" + description: "Net value of noncharitable-use assets" + mode: "nullable" + - name: "cmpmininvstret" + type: "integer" + description: "Minimum investment return" + mode: "nullable" + - name: "distribamt" + type: "integer" + description: "Distributable amount" + mode: "nullable" + - name: "undistribincyr" + type: "integer" + description: "Undistributed income" + mode: "nullable" + - name: "adjnetinccola" + type: "integer" + description: "Adjusted net income column a" + mode: "nullable" + - name: "adjnetinccolb" + type: "integer" + description: "Adjusted net income column b" + mode: "nullable" + - name: "adjnetinccolc" + type: "integer" + description: "Adjusted net income column c" + mode: "nullable" + - name: "adjnetinccold" + type: "integer" + description: "Adjusted net income column d" + mode: "nullable" + - name: "adjnetinctot" + type: "integer" + description: "Adjusted net income total" + mode: "nullable" + - name: "qlfydistriba" + type: "integer" + description: "Qualifying distributions column a" + mode: "nullable" + - name: "qlfydistribb" + type: "integer" + description: "Qualifying distributions column b" + mode: "nullable" + - name: "qlfydistribc" + type: "integer" + description: "Qualifying distributions column c" + mode: "nullable" + - name: "qlfydistribd" + type: "integer" + description: "Qualifying distributions column d" + mode: "nullable" + - name: "qlfydistribtot" + type: "integer" + description: "Qualifying distributions total" + mode: "nullable" + - name: "valassetscola" + type: "integer" + description: "Value assets column a" + mode: "nullable" + - name: "valassetscolb" + type: "integer" + description: "Value assets column b" + mode: "nullable" + - name: "valassetscolc" + type: "integer" + description: "Value assets column c" + mode: "nullable" + - name: "valassetscold" + type: "integer" + description: "Value assets column d" + mode: "nullable" + - name: "valassetstot" + type: "integer" + description: "Value assets total" + mode: "nullable" + - name: "qlfyasseta" + type: "integer" + description: "Qualifying assets column a" + mode: "nullable" + - name: "qlfyassetb" + type: "integer" + description: "Qualifying assets column b" + mode: "nullable" + - name: "qlfyassetc" + type: "integer" + description: "Qualifying assets column c" + mode: "nullable" + - name: "qlfyassetd" + type: "integer" + description: "Qualifying assets column d" + mode: "nullable" + - name: "qlfyassettot" + type: "integer" + description: "Qualifying assets total" + mode: "nullable" + - name: "endwmntscola" + type: "integer" + description: "Endowments column a" + mode: "nullable" + - name: "endwmntscolb" + type: "integer" + description: "Endowments column b" + mode: "nullable" + - name: "endwmntscolc" + type: "integer" + description: "Endowments column c" + mode: "nullable" + - name: "endwmntscold" + type: "integer" + description: "Endowments column d" + mode: "nullable" + - name: "endwmntstot" + type: "integer" + description: "Endowments total" + mode: "nullable" + - name: "totsuprtcola" + type: "integer" + description: "Total support column a" + mode: "nullable" + - name: "totsuprtcolb" + type: "integer" + description: "Total support column b" + mode: "nullable" + - name: "totsuprtcolc" + type: "integer" + description: "Total support column c" + mode: "nullable" + - name: "totsuprtcold" + type: "integer" + description: "Total support column d" + mode: "nullable" + - name: "totsuprttot" + type: "integer" + description: "Total support total" + mode: "nullable" + - name: "pubsuprtcola" + type: "integer" + description: "Public support column a" + mode: "nullable" + - name: "pubsuprtcolb" + type: "integer" + description: "Public support column b" + mode: "nullable" + - name: "pubsuprtcolc" + type: "integer" + description: "Public support column c" + mode: "nullable" + - name: "pubsuprtcold" + type: "integer" + description: "Public support column d" + mode: "nullable" + - name: "pubsuprttot" + type: "integer" + description: "Public support total" + mode: "nullable" + - name: "grsinvstinca" + type: "integer" + description: "Gross investment income column a" + mode: "nullable" + - name: "grsinvstincb" + type: "integer" + description: "Gross investment income column b" + mode: "nullable" + - name: "grsinvstincc" + type: "integer" + description: "Gross investment income column c" + mode: "nullable" + - name: "grsinvstincd" + type: "integer" + description: "Gross investment income column d" + mode: "nullable" + - name: "grsinvstinctot" + type: "integer" + description: "Gross investment income total" + mode: "nullable" + - name: "grntapprvfut" + type: "integer" + description: "Grants approved for future payment" + mode: "nullable" + - name: "progsrvcacold" + type: "integer" + description: "Program service revenue line 1a (excluded)" + mode: "nullable" + - name: "progsrvcacole" + type: "integer" + description: "Program service revenue line 1a (exempt)" + mode: "nullable" + - name: "progsrvcbcold" + type: "integer" + description: "Program service revenue line 1b (excluded)" + mode: "nullable" + - name: "progsrvcbcole" + type: "integer" + description: "Program service revenue line 1b (exempt)" + mode: "nullable" + - name: "progsrvcccold" + type: "integer" + description: "Program service revenue line 1c (excluded)" + mode: "nullable" + - name: "progsrvcccole" + type: "integer" + description: "Program service revenue line 1c (exempt)" + mode: "nullable" + - name: "progsrvcdcold" + type: "integer" + description: "Program service revenue line 1d (excluded)" + mode: "nullable" + - name: "progsrvcdcole" + type: "integer" + description: "Program service revenue line 1d (exempt)" + mode: "nullable" + - name: "progsrvcecold" + type: "integer" + description: "Program service revenue line 1e (excluded)" + mode: "nullable" + - name: "progsrvcecole" + type: "integer" + description: "Program service revenue line 1e (exempt)" + mode: "nullable" + - name: "progsrvcfcold" + type: "integer" + description: "Program service revenue line 1f (excluded)" + mode: "nullable" + - name: "progsrvcfcole" + type: "integer" + description: "Program service revenue line 1f (exempt)" + mode: "nullable" + - name: "progsrvcgcold" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (excluded)" + mode: "nullable" + - name: "progsrvcgcole" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (exempt)" + mode: "nullable" + - name: "membershpduesd" + type: "integer" + description: "Membership dues and assessments (excluded)" + mode: "nullable" + - name: "membershpduese" + type: "integer" + description: "Membership dues and assessments (exempt)" + mode: "nullable" + - name: "intonsvngsd" + type: "integer" + description: "Interest on savings and temporary cash investments (excluded)" + mode: "nullable" + - name: "intonsvngse" + type: "integer" + description: "Interest on savings and temporary cash investments (exempt)" + mode: "nullable" + - name: "dvdndsintd" + type: "integer" + description: "Dividends and interest from securities (excluded)" + mode: "nullable" + - name: "dvdndsinte" + type: "integer" + description: "Dividends and interest from securities (exempt)" + mode: "nullable" + - name: "trnsfrcashcd" + type: "string" + description: "Transfer cash to noncharitable exempt organization?" + mode: "nullable" + - name: "trnsothasstscd" + type: "string" + description: "Transfer other assets to noncharitable exempt organization?" + mode: "nullable" + - name: "salesasstscd" + type: "string" + description: "Sale of assets to noncharitable exempt organization?" + mode: "nullable" + - name: "prchsasstscd" + type: "string" + description: "Purchase of assets from noncharitable exempt organization?" + mode: "nullable" + - name: "rentlsfacltscd" + type: "string" + description: "Rental of facilities or other assets?" + mode: "nullable" + - name: "reimbrsmntscd" + type: "string" + description: "Reimbursements arrangements?" + mode: "nullable" + - name: "loansguarcd" + type: "string" + description: "Loans or other guarantees?" + mode: "nullable" + - name: "perfservicescd" + type: "string" + description: "Performance of services or membership or fundraising solicitations?" + mode: "nullable" + - name: "sharngasstscd" + type: "string" + description: "Sharing of facilities equipment mailing lists other assets or paid employees?" + mode: "nullable" + + + graph_paths: + - "irs_990_pf_2015_transform_csv >> load_irs_990_pf_2015_to_bq" diff --git a/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py b/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py new file mode 100644 index 000000000..932e77562 --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py @@ -0,0 +1,1143 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_pf_2016", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_pf_2016_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_pf_2016_transform_csv", + startup_timeout_seconds=600, + name="irs_990_pf_2016", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/16eofinextract990pf.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_pf_2016/data_output.csv", + "PIPELINE_NAME": "irs_990_pf_2016", + "CSV_HEADERS": '["ein","elf","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","distribdafcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"]', + "RENAME_MAPPINGS": '{"ELF": "elf","ELFCD": "elf","EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","DISTRIBDAFCD": "distribdafcd","ACQDRINDRINTCD": "distribdafcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"}', + }, + resources={"request_memory": "2G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_pf_2016_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_pf_2016_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_pf_2016/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_pf_2016", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_prd", + "type": "string", + "description": "Tax period (YYYYMM format)", + "mode": "nullable", + }, + { + "name": "eostatus", + "type": "string", + "description": "EO Status Code", + "mode": "nullable", + }, + { + "name": "tax_yr", + "type": "integer", + "description": "SOI Year", + "mode": "nullable", + }, + { + "name": "operatingcd", + "type": "string", + "description": "Operating foundation code", + "mode": "nullable", + }, + { + "name": "subcd", + "type": "string", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "fairmrktvalamt", + "type": "integer", + "description": "Total assets – e-o-y fair market valu", + "mode": "nullable", + }, + { + "name": "grscontrgifts", + "type": "integer", + "description": "Contributions received", + "mode": "nullable", + }, + { + "name": "schedbind", + "type": "string", + "description": "Schedule B indicator", + "mode": "nullable", + }, + { + "name": "intrstrvnue", + "type": "integer", + "description": "Interest revenue", + "mode": "nullable", + }, + { + "name": "dividndsamt", + "type": "integer", + "description": "", + "mode": "nullable", + }, + { + "name": "grsrents", + "type": "integer", + "description": "Gross rents", + "mode": "nullable", + }, + { + "name": "grsslspramt", + "type": "integer", + "description": "Gross sales price for assets", + "mode": "nullable", + }, + { + "name": "costsold", + "type": "integer", + "description": "Cost-of-goods-sold", + "mode": "nullable", + }, + { + "name": "grsprofitbus", + "type": "integer", + "description": "Gross profit", + "mode": "nullable", + }, + { + "name": "otherincamt", + "type": "integer", + "description": "Other income", + "mode": "nullable", + }, + { + "name": "totrcptperbks", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "compofficers", + "type": "integer", + "description": "Compensation of officers", + "mode": "nullable", + }, + { + "name": "pensplemplbenf", + "type": "integer", + "description": "Pension plans employee benefits", + "mode": "nullable", + }, + { + "name": "legalfeesamt", + "type": "integer", + "description": "Legal fees", + "mode": "nullable", + }, + { + "name": "accountingfees", + "type": "integer", + "description": "Accounting fees", + "mode": "nullable", + }, + { + "name": "interestamt", + "type": "integer", + "description": "Interest", + "mode": "nullable", + }, + { + "name": "depreciationamt", + "type": "integer", + "description": "Depreciation and depletion", + "mode": "nullable", + }, + { + "name": "occupancyamt", + "type": "integer", + "description": "Occupancy", + "mode": "nullable", + }, + { + "name": "travlconfmtngs", + "type": "integer", + "description": "Travel conferences and meetings", + "mode": "nullable", + }, + { + "name": "printingpubl", + "type": "integer", + "description": "Printing and publications", + "mode": "nullable", + }, + { + "name": "topradmnexpnsa", + "type": "integer", + "description": "Total operating and administrative expenses column a", + "mode": "nullable", + }, + { + "name": "contrpdpbks", + "type": "integer", + "description": "Contributions gifts grants paid", + "mode": "nullable", + }, + { + "name": "totexpnspbks", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "excessrcpts", + "type": "integer", + "description": "Net income less deficit", + "mode": "nullable", + }, + { + "name": "totrcptnetinc", + "type": "integer", + "description": "Total receipts net investment income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsb", + "type": "integer", + "description": "Total operating and administrative expenses column b", + "mode": "nullable", + }, + { + "name": "totexpnsnetinc", + "type": "integer", + "description": "Total expenses net investment income", + "mode": "nullable", + }, + { + "name": "netinvstinc", + "type": "integer", + "description": "Net investment income", + "mode": "nullable", + }, + { + "name": "trcptadjnetinc", + "type": "integer", + "description": "Total receipts adjusted net income", + "mode": "nullable", + }, + { + "name": "totexpnsadjnet", + "type": "integer", + "description": "Total expenses adjusted net income", + "mode": "nullable", + }, + { + "name": "adjnetinc", + "type": "integer", + "description": "Adjusted net income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsd", + "type": "integer", + "description": "Total operating and administrative expenses column d", + "mode": "nullable", + }, + { + "name": "totexpnsexempt", + "type": "integer", + "description": "Total expenses – exempt purpose", + "mode": "nullable", + }, + { + "name": "othrcashamt", + "type": "integer", + "description": "Cash non-interest-bearing – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstgovtoblig", + "type": "integer", + "description": "Investments in U.S. & state government obligations – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpstk", + "type": "integer", + "description": "Investments in corporate stock – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpbnd", + "type": "integer", + "description": "Investments in corporate bonds– e-o-y book value", + "mode": "nullable", + }, + { + "name": "totinvstsec", + "type": "integer", + "description": "Total investments in securities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgloans", + "type": "integer", + "description": "Investments mortgage loans – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrinvstend", + "type": "integer", + "description": "Other investments – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrassetseoy", + "type": "integer", + "description": "Other assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgnotespay", + "type": "integer", + "description": "Mortgage loans payable – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrliabltseoy", + "type": "integer", + "description": "Other liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "tfundnworth", + "type": "integer", + "description": "Total fund net worth – e-o-y book value", + "mode": "nullable", + }, + { + "name": "fairmrktvaleoy", + "type": "integer", + "description": "Total assets – e-o-y fair market value", + "mode": "nullable", + }, + { + "name": "totexcapgnls", + "type": "integer", + "description": "Capital gain net income", + "mode": "nullable", + }, + { + "name": "totexcapgn", + "type": "integer", + "description": "Net gain – sales of assets", + "mode": "nullable", + }, + { + "name": "totexcapls", + "type": "integer", + "description": "Net loss – sales of assets", + "mode": "nullable", + }, + { + "name": "invstexcisetx", + "type": "integer", + "description": "Excise tax on net investment income", + "mode": "nullable", + }, + { + "name": "sec4940notxcd", + "type": "string", + "description": "Section 4940 – no tax", + "mode": "nullable", + }, + { + "name": "sec4940redtxcd", + "type": "string", + "description": "Section 4940 – 1 % tax", + "mode": "nullable", + }, + { + "name": "sect511tx", + "type": "integer", + "description": "Section 511 tax", + "mode": "nullable", + }, + { + "name": "subtitleatx", + "type": "integer", + "description": "Subtitle A tax", + "mode": "nullable", + }, + { + "name": "totaxpyr", + "type": "integer", + "description": "Total excise tax", + "mode": "nullable", + }, + { + "name": "esttaxcr", + "type": "integer", + "description": "Estimated tax credit", + "mode": "nullable", + }, + { + "name": "txwithldsrc", + "type": "integer", + "description": "Tax withheld at source", + "mode": "nullable", + }, + { + "name": "txpaidf2758", + "type": "integer", + "description": "Tax paid with Form 2758 (filing extension)", + "mode": "nullable", + }, + { + "name": "erronbkupwthld", + "type": "integer", + "description": "Erroneous backup withholding credit amount", + "mode": "nullable", + }, + { + "name": "estpnlty", + "type": "integer", + "description": "Estimated tax penalty", + "mode": "nullable", + }, + { + "name": "taxdue", + "type": "integer", + "description": "Tax due", + "mode": "nullable", + }, + { + "name": "overpay", + "type": "integer", + "description": "Overpayment", + "mode": "nullable", + }, + { + "name": "crelamt", + "type": "integer", + "description": "Credit elect amount", + "mode": "nullable", + }, + { + "name": "infleg", + "type": "string", + "description": "Influence legislation?", + "mode": "nullable", + }, + { + "name": "actnotpr", + "type": "string", + "description": "Activities not previously reported?", + "mode": "nullable", + }, + { + "name": "chgnprvrptcd", + "type": "string", + "description": "Changes not previously reported?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Filed 990-T?", + "mode": "nullable", + }, + { + "name": "contractncd", + "type": "string", + "description": "Contraction?", + "mode": "nullable", + }, + { + "name": "furnishcpycd", + "type": "string", + "description": "Furnished copy to Attorney General?", + "mode": "nullable", + }, + { + "name": "claimstatcd", + "type": "string", + "description": "Claiming status?", + "mode": "nullable", + }, + { + "name": "cntrbtrstxyrcd", + "type": "string", + "description": "Substantial contributors?", + "mode": "nullable", + }, + { + "name": "distribdafcd", + "type": "string", + "description": "Distribution to donor advised fund with advisory privileges?", + "mode": "nullable", + }, + { + "name": "orgcmplypubcd", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "filedlf1041ind", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "propexchcd", + "type": "string", + "description": "Property exchange?", + "mode": "nullable", + }, + { + "name": "brwlndmnycd", + "type": "string", + "description": "Borrow lend money?", + "mode": "nullable", + }, + { + "name": "furngoodscd", + "type": "string", + "description": "Furnished goods?", + "mode": "nullable", + }, + { + "name": "paidcmpncd", + "type": "string", + "description": "Paid compensation?", + "mode": "nullable", + }, + { + "name": "transfercd", + "type": "string", + "description": "Transfer?", + "mode": "nullable", + }, + { + "name": "agremkpaycd", + "type": "string", + "description": "Agree to make pay?", + "mode": "nullable", + }, + { + "name": "exceptactsind", + "type": "string", + "description": "Acts fail to qualify under section 53.4941(d)-3?", + "mode": "nullable", + }, + { + "name": "prioractvcd", + "type": "string", + "description": "Engage in acts in prior year?", + "mode": "nullable", + }, + { + "name": "undistrinccd", + "type": "string", + "description": "Undistributed income?", + "mode": "nullable", + }, + { + "name": "applyprovind", + "type": "string", + "description": "Not applying section 4942(a)(2) provisions?", + "mode": "nullable", + }, + { + "name": "dirindirintcd", + "type": "string", + "description": "Direct indirect interest?", + "mode": "nullable", + }, + { + "name": "excesshldcd", + "type": "string", + "description": "Excess business holdings?", + "mode": "nullable", + }, + { + "name": "invstjexmptcd", + "type": "string", + "description": "Jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "prevjexmptcd", + "type": "string", + "description": "Prior year jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "propgndacd", + "type": "string", + "description": "Propaganda?", + "mode": "nullable", + }, + { + "name": "ipubelectcd", + "type": "string", + "description": "Influence public election?", + "mode": "nullable", + }, + { + "name": "grntindivcd", + "type": "string", + "description": "Grant individual?", + "mode": "nullable", + }, + { + "name": "nchrtygrntcd", + "type": "string", + "description": "Non-charity grant?", + "mode": "nullable", + }, + { + "name": "nreligiouscd", + "type": "string", + "description": "Non-religious?", + "mode": "nullable", + }, + { + "name": "excptransind", + "type": "string", + "description": "Transactions fail to qualify under section 53.4945?", + "mode": "nullable", + }, + { + "name": "rfprsnlbnftind", + "type": "string", + "description": "Receive funds to pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "pyprsnlbnftind", + "type": "string", + "description": "Pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "tfairmrktunuse", + "type": "integer", + "description": "Fair market value of assets not used for charitable purposes", + "mode": "nullable", + }, + { + "name": "valncharitassets", + "type": "integer", + "description": "Net value of noncharitable-use assets", + "mode": "nullable", + }, + { + "name": "cmpmininvstret", + "type": "integer", + "description": "Minimum investment return", + "mode": "nullable", + }, + { + "name": "distribamt", + "type": "integer", + "description": "Distributable amount", + "mode": "nullable", + }, + { + "name": "undistribincyr", + "type": "integer", + "description": "Undistributed income", + "mode": "nullable", + }, + { + "name": "adjnetinccola", + "type": "integer", + "description": "Adjusted net income column a", + "mode": "nullable", + }, + { + "name": "adjnetinccolb", + "type": "integer", + "description": "Adjusted net income column b", + "mode": "nullable", + }, + { + "name": "adjnetinccolc", + "type": "integer", + "description": "Adjusted net income column c", + "mode": "nullable", + }, + { + "name": "adjnetinccold", + "type": "integer", + "description": "Adjusted net income column d", + "mode": "nullable", + }, + { + "name": "adjnetinctot", + "type": "integer", + "description": "Adjusted net income total", + "mode": "nullable", + }, + { + "name": "qlfydistriba", + "type": "integer", + "description": "Qualifying distributions column a", + "mode": "nullable", + }, + { + "name": "qlfydistribb", + "type": "integer", + "description": "Qualifying distributions column b", + "mode": "nullable", + }, + { + "name": "qlfydistribc", + "type": "integer", + "description": "Qualifying distributions column c", + "mode": "nullable", + }, + { + "name": "qlfydistribd", + "type": "integer", + "description": "Qualifying distributions column d", + "mode": "nullable", + }, + { + "name": "qlfydistribtot", + "type": "integer", + "description": "Qualifying distributions total", + "mode": "nullable", + }, + { + "name": "valassetscola", + "type": "integer", + "description": "Value assets column a", + "mode": "nullable", + }, + { + "name": "valassetscolb", + "type": "integer", + "description": "Value assets column b", + "mode": "nullable", + }, + { + "name": "valassetscolc", + "type": "integer", + "description": "Value assets column c", + "mode": "nullable", + }, + { + "name": "valassetscold", + "type": "integer", + "description": "Value assets column d", + "mode": "nullable", + }, + { + "name": "valassetstot", + "type": "integer", + "description": "Value assets total", + "mode": "nullable", + }, + { + "name": "qlfyasseta", + "type": "integer", + "description": "Qualifying assets column a", + "mode": "nullable", + }, + { + "name": "qlfyassetb", + "type": "integer", + "description": "Qualifying assets column b", + "mode": "nullable", + }, + { + "name": "qlfyassetc", + "type": "integer", + "description": "Qualifying assets column c", + "mode": "nullable", + }, + { + "name": "qlfyassetd", + "type": "integer", + "description": "Qualifying assets column d", + "mode": "nullable", + }, + { + "name": "qlfyassettot", + "type": "integer", + "description": "Qualifying assets total", + "mode": "nullable", + }, + { + "name": "endwmntscola", + "type": "integer", + "description": "Endowments column a", + "mode": "nullable", + }, + { + "name": "endwmntscolb", + "type": "integer", + "description": "Endowments column b", + "mode": "nullable", + }, + { + "name": "endwmntscolc", + "type": "integer", + "description": "Endowments column c", + "mode": "nullable", + }, + { + "name": "endwmntscold", + "type": "integer", + "description": "Endowments column d", + "mode": "nullable", + }, + { + "name": "endwmntstot", + "type": "integer", + "description": "Endowments total", + "mode": "nullable", + }, + { + "name": "totsuprtcola", + "type": "integer", + "description": "Total support column a", + "mode": "nullable", + }, + { + "name": "totsuprtcolb", + "type": "integer", + "description": "Total support column b", + "mode": "nullable", + }, + { + "name": "totsuprtcolc", + "type": "integer", + "description": "Total support column c", + "mode": "nullable", + }, + { + "name": "totsuprtcold", + "type": "integer", + "description": "Total support column d", + "mode": "nullable", + }, + { + "name": "totsuprttot", + "type": "integer", + "description": "Total support total", + "mode": "nullable", + }, + { + "name": "pubsuprtcola", + "type": "integer", + "description": "Public support column a", + "mode": "nullable", + }, + { + "name": "pubsuprtcolb", + "type": "integer", + "description": "Public support column b", + "mode": "nullable", + }, + { + "name": "pubsuprtcolc", + "type": "integer", + "description": "Public support column c", + "mode": "nullable", + }, + { + "name": "pubsuprtcold", + "type": "integer", + "description": "Public support column d", + "mode": "nullable", + }, + { + "name": "pubsuprttot", + "type": "integer", + "description": "Public support total", + "mode": "nullable", + }, + { + "name": "grsinvstinca", + "type": "integer", + "description": "Gross investment income column a", + "mode": "nullable", + }, + { + "name": "grsinvstincb", + "type": "integer", + "description": "Gross investment income column b", + "mode": "nullable", + }, + { + "name": "grsinvstincc", + "type": "integer", + "description": "Gross investment income column c", + "mode": "nullable", + }, + { + "name": "grsinvstincd", + "type": "integer", + "description": "Gross investment income column d", + "mode": "nullable", + }, + { + "name": "grsinvstinctot", + "type": "integer", + "description": "Gross investment income total", + "mode": "nullable", + }, + { + "name": "grntapprvfut", + "type": "integer", + "description": "Grants approved for future payment", + "mode": "nullable", + }, + { + "name": "progsrvcacold", + "type": "integer", + "description": "Program service revenue line 1a (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcacole", + "type": "integer", + "description": "Program service revenue line 1a (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcbcold", + "type": "integer", + "description": "Program service revenue line 1b (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcbcole", + "type": "integer", + "description": "Program service revenue line 1b (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcccold", + "type": "integer", + "description": "Program service revenue line 1c (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcccole", + "type": "integer", + "description": "Program service revenue line 1c (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcdcold", + "type": "integer", + "description": "Program service revenue line 1d (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcdcole", + "type": "integer", + "description": "Program service revenue line 1d (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcecold", + "type": "integer", + "description": "Program service revenue line 1e (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcecole", + "type": "integer", + "description": "Program service revenue line 1e (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcfcold", + "type": "integer", + "description": "Program service revenue line 1f (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcfcole", + "type": "integer", + "description": "Program service revenue line 1f (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcgcold", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcgcole", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (exempt)", + "mode": "nullable", + }, + { + "name": "membershpduesd", + "type": "integer", + "description": "Membership dues and assessments (excluded)", + "mode": "nullable", + }, + { + "name": "membershpduese", + "type": "integer", + "description": "Membership dues and assessments (exempt)", + "mode": "nullable", + }, + { + "name": "intonsvngsd", + "type": "integer", + "description": "Interest on savings and temporary cash investments (excluded)", + "mode": "nullable", + }, + { + "name": "intonsvngse", + "type": "integer", + "description": "Interest on savings and temporary cash investments (exempt)", + "mode": "nullable", + }, + { + "name": "dvdndsintd", + "type": "integer", + "description": "Dividends and interest from securities (excluded)", + "mode": "nullable", + }, + { + "name": "dvdndsinte", + "type": "integer", + "description": "Dividends and interest from securities (exempt)", + "mode": "nullable", + }, + { + "name": "trnsfrcashcd", + "type": "string", + "description": "Transfer cash to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "trnsothasstscd", + "type": "string", + "description": "Transfer other assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "salesasstscd", + "type": "string", + "description": "Sale of assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "prchsasstscd", + "type": "string", + "description": "Purchase of assets from noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "rentlsfacltscd", + "type": "string", + "description": "Rental of facilities or other assets?", + "mode": "nullable", + }, + { + "name": "reimbrsmntscd", + "type": "string", + "description": "Reimbursements arrangements?", + "mode": "nullable", + }, + { + "name": "loansguarcd", + "type": "string", + "description": "Loans or other guarantees?", + "mode": "nullable", + }, + { + "name": "perfservicescd", + "type": "string", + "description": "Performance of services or membership or fundraising solicitations?", + "mode": "nullable", + }, + { + "name": "sharngasstscd", + "type": "string", + "description": "Sharing of facilities equipment mailing lists other assets or paid employees?", + "mode": "nullable", + }, + ], + ) + + irs_990_pf_2016_transform_csv >> load_irs_990_pf_2016_to_bq diff --git a/datasets/irs_990/irs_990_pf_2016/pipeline.yaml b/datasets/irs_990/irs_990_pf_2016/pipeline.yaml new file mode 100644 index 000000000..014c7f0a5 --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2016/pipeline.yaml @@ -0,0 +1,828 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_pf_2016 + + # Description of the table + description: "IRS 990 PF 2016 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_pf_2016 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_pf_2016_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_pf_2016" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/16eofinextract990pf.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_pf_2016/data_output.csv" + PIPELINE_NAME: "irs_990_pf_2016" + CSV_HEADERS: >- + ["ein","elf","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","distribdafcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"] + RENAME_MAPPINGS: >- + {"ELF": "elf","ELFCD": "elf","EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","DISTRIBDAFCD": "distribdafcd","ACQDRINDRINTCD": "distribdafcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "2G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_pf_2016_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_pf_2016/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_pf_2016" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "elf" + type: "string" + description: "E-file indicator" + mode: "nullable" + - name: "tax_prd" + type: "string" + description: "Tax period (YYYYMM format)" + mode: "nullable" + - name: "eostatus" + type: "string" + description: "EO Status Code" + mode: "nullable" + - name: "tax_yr" + type: "integer" + description: "SOI Year" + mode: "nullable" + - name: "operatingcd" + type: "string" + description: "Operating foundation code" + mode: "nullable" + - name: "subcd" + type: "string" + description: "Subsection code" + mode: "nullable" + - name: "fairmrktvalamt" + type: "integer" + description: "Total assets – e-o-y fair market valu" + mode: "nullable" + - name: "grscontrgifts" + type: "integer" + description: "Contributions received" + mode: "nullable" + - name: "schedbind" + type: "string" + description: "Schedule B indicator" + mode: "nullable" + - name: "intrstrvnue" + type: "integer" + description: "Interest revenue" + mode: "nullable" + - name: "dividndsamt" + type: "integer" + description: "" + mode: "nullable" + - name: "grsrents" + type: "integer" + description: "Gross rents" + mode: "nullable" + - name: "grsslspramt" + type: "integer" + description: "Gross sales price for assets" + mode: "nullable" + - name: "costsold" + type: "integer" + description: "Cost-of-goods-sold" + mode: "nullable" + - name: "grsprofitbus" + type: "integer" + description: "Gross profit" + mode: "nullable" + - name: "otherincamt" + type: "integer" + description: "Other income" + mode: "nullable" + - name: "totrcptperbks" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "compofficers" + type: "integer" + description: "Compensation of officers" + mode: "nullable" + - name: "pensplemplbenf" + type: "integer" + description: "Pension plans employee benefits" + mode: "nullable" + - name: "legalfeesamt" + type: "integer" + description: "Legal fees" + mode: "nullable" + - name: "accountingfees" + type: "integer" + description: "Accounting fees" + mode: "nullable" + - name: "interestamt" + type: "integer" + description: "Interest" + mode: "nullable" + - name: "depreciationamt" + type: "integer" + description: "Depreciation and depletion" + mode: "nullable" + - name: "occupancyamt" + type: "integer" + description: "Occupancy" + mode: "nullable" + - name: "travlconfmtngs" + type: "integer" + description: "Travel conferences and meetings" + mode: "nullable" + - name: "printingpubl" + type: "integer" + description: "Printing and publications" + mode: "nullable" + - name: "topradmnexpnsa" + type: "integer" + description: "Total operating and administrative expenses column a" + mode: "nullable" + - name: "contrpdpbks" + type: "integer" + description: "Contributions gifts grants paid" + mode: "nullable" + - name: "totexpnspbks" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "excessrcpts" + type: "integer" + description: "Net income less deficit" + mode: "nullable" + - name: "totrcptnetinc" + type: "integer" + description: "Total receipts net investment income" + mode: "nullable" + - name: "topradmnexpnsb" + type: "integer" + description: "Total operating and administrative expenses column b" + mode: "nullable" + - name: "totexpnsnetinc" + type: "integer" + description: "Total expenses net investment income" + mode: "nullable" + - name: "netinvstinc" + type: "integer" + description: "Net investment income" + mode: "nullable" + - name: "trcptadjnetinc" + type: "integer" + description: "Total receipts adjusted net income" + mode: "nullable" + - name: "totexpnsadjnet" + type: "integer" + description: "Total expenses adjusted net income" + mode: "nullable" + - name: "adjnetinc" + type: "integer" + description: "Adjusted net income" + mode: "nullable" + - name: "topradmnexpnsd" + type: "integer" + description: "Total operating and administrative expenses column d" + mode: "nullable" + - name: "totexpnsexempt" + type: "integer" + description: "Total expenses – exempt purpose" + mode: "nullable" + - name: "othrcashamt" + type: "integer" + description: "Cash non-interest-bearing – e-o-y book value" + mode: "nullable" + - name: "invstgovtoblig" + type: "integer" + description: "Investments in U.S. & state government obligations – e-o-y book value" + mode: "nullable" + - name: "invstcorpstk" + type: "integer" + description: "Investments in corporate stock – e-o-y book value" + mode: "nullable" + - name: "invstcorpbnd" + type: "integer" + description: "Investments in corporate bonds– e-o-y book value" + mode: "nullable" + - name: "totinvstsec" + type: "integer" + description: "Total investments in securities – e-o-y book value" + mode: "nullable" + - name: "mrtgloans" + type: "integer" + description: "Investments mortgage loans – e-o-y book value" + mode: "nullable" + - name: "othrinvstend" + type: "integer" + description: "Other investments – e-o-y book value" + mode: "nullable" + - name: "othrassetseoy" + type: "integer" + description: "Other assets – e-o-y book value" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets – e-o-y book value" + mode: "nullable" + - name: "mrtgnotespay" + type: "integer" + description: "Mortgage loans payable – e-o-y book value" + mode: "nullable" + - name: "othrliabltseoy" + type: "integer" + description: "Other liabilities – e-o-y book value" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities – e-o-y book value" + mode: "nullable" + - name: "tfundnworth" + type: "integer" + description: "Total fund net worth – e-o-y book value" + mode: "nullable" + - name: "fairmrktvaleoy" + type: "integer" + description: "Total assets – e-o-y fair market value" + mode: "nullable" + - name: "totexcapgnls" + type: "integer" + description: "Capital gain net income" + mode: "nullable" + - name: "totexcapgn" + type: "integer" + description: "Net gain – sales of assets" + mode: "nullable" + - name: "totexcapls" + type: "integer" + description: "Net loss – sales of assets" + mode: "nullable" + - name: "invstexcisetx" + type: "integer" + description: "Excise tax on net investment income" + mode: "nullable" + - name: "sec4940notxcd" + type: "string" + description: "Section 4940 – no tax" + mode: "nullable" + - name: "sec4940redtxcd" + type: "string" + description: "Section 4940 – 1 % tax" + mode: "nullable" + - name: "sect511tx" + type: "integer" + description: "Section 511 tax" + mode: "nullable" + - name: "subtitleatx" + type: "integer" + description: "Subtitle A tax" + mode: "nullable" + - name: "totaxpyr" + type: "integer" + description: "Total excise tax" + mode: "nullable" + - name: "esttaxcr" + type: "integer" + description: "Estimated tax credit" + mode: "nullable" + - name: "txwithldsrc" + type: "integer" + description: "Tax withheld at source" + mode: "nullable" + - name: "txpaidf2758" + type: "integer" + description: "Tax paid with Form 2758 (filing extension)" + mode: "nullable" + - name: "erronbkupwthld" + type: "integer" + description: "Erroneous backup withholding credit amount" + mode: "nullable" + - name: "estpnlty" + type: "integer" + description: "Estimated tax penalty" + mode: "nullable" + - name: "taxdue" + type: "integer" + description: "Tax due" + mode: "nullable" + - name: "overpay" + type: "integer" + description: "Overpayment" + mode: "nullable" + - name: "crelamt" + type: "integer" + description: "Credit elect amount" + mode: "nullable" + - name: "infleg" + type: "string" + description: "Influence legislation?" + mode: "nullable" + - name: "actnotpr" + type: "string" + description: "Activities not previously reported?" + mode: "nullable" + - name: "chgnprvrptcd" + type: "string" + description: "Changes not previously reported?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Filed 990-T?" + mode: "nullable" + - name: "contractncd" + type: "string" + description: "Contraction?" + mode: "nullable" + - name: "furnishcpycd" + type: "string" + description: "Furnished copy to Attorney General?" + mode: "nullable" + - name: "claimstatcd" + type: "string" + description: "Claiming status?" + mode: "nullable" + - name: "cntrbtrstxyrcd" + type: "string" + description: "Substantial contributors?" + mode: "nullable" + - name: "distribdafcd" + type: "string" + description: "Distribution to donor advised fund with advisory privileges?" + mode: "nullable" + - name: "orgcmplypubcd" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "filedlf1041ind" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "propexchcd" + type: "string" + description: "Property exchange?" + mode: "nullable" + - name: "brwlndmnycd" + type: "string" + description: "Borrow lend money?" + mode: "nullable" + - name: "furngoodscd" + type: "string" + description: "Furnished goods?" + mode: "nullable" + - name: "paidcmpncd" + type: "string" + description: "Paid compensation?" + mode: "nullable" + - name: "transfercd" + type: "string" + description: "Transfer?" + mode: "nullable" + - name: "agremkpaycd" + type: "string" + description: "Agree to make pay?" + mode: "nullable" + - name: "exceptactsind" + type: "string" + description: "Acts fail to qualify under section 53.4941(d)-3?" + mode: "nullable" + - name: "prioractvcd" + type: "string" + description: "Engage in acts in prior year?" + mode: "nullable" + - name: "undistrinccd" + type: "string" + description: "Undistributed income?" + mode: "nullable" + - name: "applyprovind" + type: "string" + description: "Not applying section 4942(a)(2) provisions?" + mode: "nullable" + - name: "dirindirintcd" + type: "string" + description: "Direct indirect interest?" + mode: "nullable" + - name: "excesshldcd" + type: "string" + description: "Excess business holdings?" + mode: "nullable" + - name: "invstjexmptcd" + type: "string" + description: "Jeopardizing investments?" + mode: "nullable" + - name: "prevjexmptcd" + type: "string" + description: "Prior year jeopardizing investments?" + mode: "nullable" + - name: "propgndacd" + type: "string" + description: "Propaganda?" + mode: "nullable" + - name: "ipubelectcd" + type: "string" + description: "Influence public election?" + mode: "nullable" + - name: "grntindivcd" + type: "string" + description: "Grant individual?" + mode: "nullable" + - name: "nchrtygrntcd" + type: "string" + description: "Non-charity grant?" + mode: "nullable" + - name: "nreligiouscd" + type: "string" + description: "Non-religious?" + mode: "nullable" + - name: "excptransind" + type: "string" + description: "Transactions fail to qualify under section 53.4945?" + mode: "nullable" + - name: "rfprsnlbnftind" + type: "string" + description: "Receive funds to pay premiums on personal benefit contract?" + mode: "nullable" + - name: "pyprsnlbnftind" + type: "string" + description: "Pay premiums on personal benefit contract?" + mode: "nullable" + - name: "tfairmrktunuse" + type: "integer" + description: "Fair market value of assets not used for charitable purposes" + mode: "nullable" + - name: "valncharitassets" + type: "integer" + description: "Net value of noncharitable-use assets" + mode: "nullable" + - name: "cmpmininvstret" + type: "integer" + description: "Minimum investment return" + mode: "nullable" + - name: "distribamt" + type: "integer" + description: "Distributable amount" + mode: "nullable" + - name: "undistribincyr" + type: "integer" + description: "Undistributed income" + mode: "nullable" + - name: "adjnetinccola" + type: "integer" + description: "Adjusted net income column a" + mode: "nullable" + - name: "adjnetinccolb" + type: "integer" + description: "Adjusted net income column b" + mode: "nullable" + - name: "adjnetinccolc" + type: "integer" + description: "Adjusted net income column c" + mode: "nullable" + - name: "adjnetinccold" + type: "integer" + description: "Adjusted net income column d" + mode: "nullable" + - name: "adjnetinctot" + type: "integer" + description: "Adjusted net income total" + mode: "nullable" + - name: "qlfydistriba" + type: "integer" + description: "Qualifying distributions column a" + mode: "nullable" + - name: "qlfydistribb" + type: "integer" + description: "Qualifying distributions column b" + mode: "nullable" + - name: "qlfydistribc" + type: "integer" + description: "Qualifying distributions column c" + mode: "nullable" + - name: "qlfydistribd" + type: "integer" + description: "Qualifying distributions column d" + mode: "nullable" + - name: "qlfydistribtot" + type: "integer" + description: "Qualifying distributions total" + mode: "nullable" + - name: "valassetscola" + type: "integer" + description: "Value assets column a" + mode: "nullable" + - name: "valassetscolb" + type: "integer" + description: "Value assets column b" + mode: "nullable" + - name: "valassetscolc" + type: "integer" + description: "Value assets column c" + mode: "nullable" + - name: "valassetscold" + type: "integer" + description: "Value assets column d" + mode: "nullable" + - name: "valassetstot" + type: "integer" + description: "Value assets total" + mode: "nullable" + - name: "qlfyasseta" + type: "integer" + description: "Qualifying assets column a" + mode: "nullable" + - name: "qlfyassetb" + type: "integer" + description: "Qualifying assets column b" + mode: "nullable" + - name: "qlfyassetc" + type: "integer" + description: "Qualifying assets column c" + mode: "nullable" + - name: "qlfyassetd" + type: "integer" + description: "Qualifying assets column d" + mode: "nullable" + - name: "qlfyassettot" + type: "integer" + description: "Qualifying assets total" + mode: "nullable" + - name: "endwmntscola" + type: "integer" + description: "Endowments column a" + mode: "nullable" + - name: "endwmntscolb" + type: "integer" + description: "Endowments column b" + mode: "nullable" + - name: "endwmntscolc" + type: "integer" + description: "Endowments column c" + mode: "nullable" + - name: "endwmntscold" + type: "integer" + description: "Endowments column d" + mode: "nullable" + - name: "endwmntstot" + type: "integer" + description: "Endowments total" + mode: "nullable" + - name: "totsuprtcola" + type: "integer" + description: "Total support column a" + mode: "nullable" + - name: "totsuprtcolb" + type: "integer" + description: "Total support column b" + mode: "nullable" + - name: "totsuprtcolc" + type: "integer" + description: "Total support column c" + mode: "nullable" + - name: "totsuprtcold" + type: "integer" + description: "Total support column d" + mode: "nullable" + - name: "totsuprttot" + type: "integer" + description: "Total support total" + mode: "nullable" + - name: "pubsuprtcola" + type: "integer" + description: "Public support column a" + mode: "nullable" + - name: "pubsuprtcolb" + type: "integer" + description: "Public support column b" + mode: "nullable" + - name: "pubsuprtcolc" + type: "integer" + description: "Public support column c" + mode: "nullable" + - name: "pubsuprtcold" + type: "integer" + description: "Public support column d" + mode: "nullable" + - name: "pubsuprttot" + type: "integer" + description: "Public support total" + mode: "nullable" + - name: "grsinvstinca" + type: "integer" + description: "Gross investment income column a" + mode: "nullable" + - name: "grsinvstincb" + type: "integer" + description: "Gross investment income column b" + mode: "nullable" + - name: "grsinvstincc" + type: "integer" + description: "Gross investment income column c" + mode: "nullable" + - name: "grsinvstincd" + type: "integer" + description: "Gross investment income column d" + mode: "nullable" + - name: "grsinvstinctot" + type: "integer" + description: "Gross investment income total" + mode: "nullable" + - name: "grntapprvfut" + type: "integer" + description: "Grants approved for future payment" + mode: "nullable" + - name: "progsrvcacold" + type: "integer" + description: "Program service revenue line 1a (excluded)" + mode: "nullable" + - name: "progsrvcacole" + type: "integer" + description: "Program service revenue line 1a (exempt)" + mode: "nullable" + - name: "progsrvcbcold" + type: "integer" + description: "Program service revenue line 1b (excluded)" + mode: "nullable" + - name: "progsrvcbcole" + type: "integer" + description: "Program service revenue line 1b (exempt)" + mode: "nullable" + - name: "progsrvcccold" + type: "integer" + description: "Program service revenue line 1c (excluded)" + mode: "nullable" + - name: "progsrvcccole" + type: "integer" + description: "Program service revenue line 1c (exempt)" + mode: "nullable" + - name: "progsrvcdcold" + type: "integer" + description: "Program service revenue line 1d (excluded)" + mode: "nullable" + - name: "progsrvcdcole" + type: "integer" + description: "Program service revenue line 1d (exempt)" + mode: "nullable" + - name: "progsrvcecold" + type: "integer" + description: "Program service revenue line 1e (excluded)" + mode: "nullable" + - name: "progsrvcecole" + type: "integer" + description: "Program service revenue line 1e (exempt)" + mode: "nullable" + - name: "progsrvcfcold" + type: "integer" + description: "Program service revenue line 1f (excluded)" + mode: "nullable" + - name: "progsrvcfcole" + type: "integer" + description: "Program service revenue line 1f (exempt)" + mode: "nullable" + - name: "progsrvcgcold" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (excluded)" + mode: "nullable" + - name: "progsrvcgcole" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (exempt)" + mode: "nullable" + - name: "membershpduesd" + type: "integer" + description: "Membership dues and assessments (excluded)" + mode: "nullable" + - name: "membershpduese" + type: "integer" + description: "Membership dues and assessments (exempt)" + mode: "nullable" + - name: "intonsvngsd" + type: "integer" + description: "Interest on savings and temporary cash investments (excluded)" + mode: "nullable" + - name: "intonsvngse" + type: "integer" + description: "Interest on savings and temporary cash investments (exempt)" + mode: "nullable" + - name: "dvdndsintd" + type: "integer" + description: "Dividends and interest from securities (excluded)" + mode: "nullable" + - name: "dvdndsinte" + type: "integer" + description: "Dividends and interest from securities (exempt)" + mode: "nullable" + - name: "trnsfrcashcd" + type: "string" + description: "Transfer cash to noncharitable exempt organization?" + mode: "nullable" + - name: "trnsothasstscd" + type: "string" + description: "Transfer other assets to noncharitable exempt organization?" + mode: "nullable" + - name: "salesasstscd" + type: "string" + description: "Sale of assets to noncharitable exempt organization?" + mode: "nullable" + - name: "prchsasstscd" + type: "string" + description: "Purchase of assets from noncharitable exempt organization?" + mode: "nullable" + - name: "rentlsfacltscd" + type: "string" + description: "Rental of facilities or other assets?" + mode: "nullable" + - name: "reimbrsmntscd" + type: "string" + description: "Reimbursements arrangements?" + mode: "nullable" + - name: "loansguarcd" + type: "string" + description: "Loans or other guarantees?" + mode: "nullable" + - name: "perfservicescd" + type: "string" + description: "Performance of services or membership or fundraising solicitations?" + mode: "nullable" + - name: "sharngasstscd" + type: "string" + description: "Sharing of facilities equipment mailing lists other assets or paid employees?" + mode: "nullable" + + + graph_paths: + - "irs_990_pf_2016_transform_csv >> load_irs_990_pf_2016_to_bq"