From aa9da6cd422799d8c8cd4c461f41e9256fdf8b9b Mon Sep 17 00:00:00 2001 From: Dipannita Banerjee Date: Wed, 25 Aug 2021 12:09:54 +0000 Subject: [PATCH 1/4] feat: Onboard IRS 990 dataset --- .../_images/run_csv_transform_kub/Dockerfile | 38 + .../_images/run_csv_transform_kub/Pipfile | 13 + .../run_csv_transform_kub/csv_transform.py | 169 +++ .../run_csv_transform_kub/requirements.txt | 4 + .../_terraform/irs_990_2014_pipeline.tf | 39 + .../_terraform/irs_990_2015_pipeline.tf | 39 + .../_terraform/irs_990_2016_pipeline.tf | 39 + .../_terraform/irs_990_2017_pipeline.tf | 39 + .../irs_990/_terraform/irs_990_dataset.tf | 26 + .../_terraform/irs_990_ez_2014_pipeline.tf | 39 + .../_terraform/irs_990_ez_2015_pipeline.tf | 39 + .../_terraform/irs_990_ez_2016_pipeline.tf | 39 + .../_terraform/irs_990_ez_2017_pipeline.tf | 39 + .../_terraform/irs_990_pf_2014_pipeline.tf | 39 + .../_terraform/irs_990_pf_2015_pipeline.tf | 39 + .../_terraform/irs_990_pf_2016_pipeline.tf | 39 + datasets/irs_990/_terraform/provider.tf | 28 + datasets/irs_990/_terraform/variables.tf | 23 + datasets/irs_990/dataset.yaml | 58 + .../irs_990/irs_990_2014/irs_990_2014_dag.py | 314 +++++ datasets/irs_990/irs_990_2014/pipeline.yaml | 846 ++++++++++++ .../irs_990/irs_990_2015/irs_990_2015_dag.py | 315 +++++ datasets/irs_990/irs_990_2015/pipeline.yaml | 847 ++++++++++++ .../irs_990/irs_990_2016/irs_990_2016_dag.py | 315 +++++ datasets/irs_990/irs_990_2016/pipeline.yaml | 853 ++++++++++++ .../irs_990/irs_990_2017/irs_990_2017_dag.py | 315 +++++ datasets/irs_990/irs_990_2017/pipeline.yaml | 854 ++++++++++++ .../irs_990_ez_2014/irs_990_ez_2014_dag.py | 495 +++++++ .../irs_990/irs_990_ez_2014/pipeline.yaml | 402 ++++++ .../irs_990_ez_2015/irs_990_ez_2015_dag.py | 501 ++++++++ .../irs_990/irs_990_ez_2015/pipeline.yaml | 406 ++++++ .../irs_990_ez_2016/irs_990_ez_2016_dag.py | 501 ++++++++ .../irs_990/irs_990_ez_2016/pipeline.yaml | 406 ++++++ .../irs_990_ez_2017/irs_990_ez_2017_dag.py | 501 ++++++++ .../irs_990/irs_990_ez_2017/pipeline.yaml | 406 ++++++ .../irs_990_pf_2014/irs_990_pf_2014_dag.py | 1137 ++++++++++++++++ .../irs_990/irs_990_pf_2014/pipeline.yaml | 828 ++++++++++++ .../irs_990_pf_2015/irs_990_pf_2015_dag.py | 1143 ++++++++++++++++ .../irs_990/irs_990_pf_2015/pipeline.yaml | 832 ++++++++++++ .../irs_990_pf_2016/irs_990_pf_2016_dag.py | 1145 +++++++++++++++++ .../irs_990/irs_990_pf_2016/pipeline.yaml | 833 ++++++++++++ 41 files changed, 14983 insertions(+) create mode 100644 datasets/irs_990/_images/run_csv_transform_kub/Dockerfile create mode 100644 datasets/irs_990/_images/run_csv_transform_kub/Pipfile create mode 100644 datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py create mode 100644 datasets/irs_990/_images/run_csv_transform_kub/requirements.txt create mode 100644 datasets/irs_990/_terraform/irs_990_2014_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_2015_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_2016_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_2017_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_dataset.tf create mode 100644 datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf create mode 100644 datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf create mode 100644 datasets/irs_990/_terraform/provider.tf create mode 100644 datasets/irs_990/_terraform/variables.tf create mode 100644 datasets/irs_990/dataset.yaml create mode 100644 datasets/irs_990/irs_990_2014/irs_990_2014_dag.py create mode 100644 datasets/irs_990/irs_990_2014/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_2015/irs_990_2015_dag.py create mode 100644 datasets/irs_990/irs_990_2015/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_2016/irs_990_2016_dag.py create mode 100644 datasets/irs_990/irs_990_2016/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_2017/irs_990_2017_dag.py create mode 100644 datasets/irs_990/irs_990_2017/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_ez_2014/irs_990_ez_2014_dag.py create mode 100644 datasets/irs_990/irs_990_ez_2014/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_ez_2015/irs_990_ez_2015_dag.py create mode 100644 datasets/irs_990/irs_990_ez_2015/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_ez_2016/irs_990_ez_2016_dag.py create mode 100644 datasets/irs_990/irs_990_ez_2016/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py create mode 100644 datasets/irs_990/irs_990_ez_2017/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py create mode 100644 datasets/irs_990/irs_990_pf_2014/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py create mode 100644 datasets/irs_990/irs_990_pf_2015/pipeline.yaml create mode 100644 datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py create mode 100644 datasets/irs_990/irs_990_pf_2016/pipeline.yaml diff --git a/datasets/irs_990/_images/run_csv_transform_kub/Dockerfile b/datasets/irs_990/_images/run_csv_transform_kub/Dockerfile new file mode 100644 index 000000000..85af90570 --- /dev/null +++ b/datasets/irs_990/_images/run_csv_transform_kub/Dockerfile @@ -0,0 +1,38 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The base image for this build +# FROM gcr.io/google.com/cloudsdktool/cloud-sdk:slim +FROM python:3.8 + +# Allow statements and log messages to appear in Cloud logs +ENV PYTHONUNBUFFERED True + +# Copy the requirements file into the image +COPY requirements.txt ./ + +# Install the packages specified in the requirements file +RUN python3 -m pip install --no-cache-dir -r requirements.txt + +# The WORKDIR instruction sets the working directory for any RUN, CMD, +# ENTRYPOINT, COPY and ADD instructions that follow it in the Dockerfile. +# If the WORKDIR doesn’t exist, it will be created even if it’s not used in +# any subsequent Dockerfile instruction +WORKDIR /custom + +# Copy the specific data processing script/s in the image under /custom/* +COPY ./csv_transform.py . + +# Command to run the data processing script when the container is run +CMD ["python3", "csv_transform.py"] diff --git a/datasets/irs_990/_images/run_csv_transform_kub/Pipfile b/datasets/irs_990/_images/run_csv_transform_kub/Pipfile new file mode 100644 index 000000000..37f9797d3 --- /dev/null +++ b/datasets/irs_990/_images/run_csv_transform_kub/Pipfile @@ -0,0 +1,13 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +requests = "*" +vaex = "*" + +[dev-packages] + +[requires] +python_version = "3.9" diff --git a/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py b/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py new file mode 100644 index 000000000..cbdbb8c0c --- /dev/null +++ b/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py @@ -0,0 +1,169 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import datetime +import json +import logging +import math +import os +import pathlib +import re +import typing +from urllib.parse import urlparse + +import pandas as pd + +# import numpy as np +import requests +from google.cloud import storage + + +def main( + source_url: str, + source_file: pathlib.Path, + target_file: pathlib.Path, + target_gcs_bucket: str, + target_gcs_path: str, + headers: typing.List[str], + rename_mappings: dict, + pipeline_name: str, +): + + logging.info( + f"irs 990 {pipeline_name} process started at " + + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + ) + + logging.info("creating 'files' folder") + pathlib.Path("./files").mkdir(parents=True, exist_ok=True) + + logging.info(f"Downloading file from {source_url}... ") + download_file(source_url, source_file) + + # open the input file + logging.info(f"Opening file {source_file}... ") + + str_value = os.path.basename(urlparse(source_url).path) + + if re.search("zip", str_value): + df = pd.read_csv( + str(source_file), compression="zip", encoding="utf-8", sep=r"\s+" + ) + else: + df = pd.read_csv(str(source_file), encoding="utf-8", sep=r"\s+") + + # steps in the pipeline + logging.info(f"Transforming.. {source_file}") + + logging.info(f"Transform: Rename columns.. {source_file}") + + rename_headers(df, rename_mappings) + + logging.info(f"Transform: filtering null values.. {source_file}") + + filter_null_rows(df) + + # logging.info("Transform: Converting to integr.. ") + + # df["totsupp509"] = df["totsupp509"].apply(convert_to_int) + + logging.info(f"Transform: converting to integer.. {source_file}") + + if re.search("pf", pipeline_name): + df.invstexcisetx = df.invstexcisetx.replace("N", 0) + df.crelamt = df.crelamt.replace("N", 0) + df.dvdndsinte = df.dvdndsinte.replace("N", 0) + df.intrstrvnue = df.intrstrvnue.replace("N", 0) + else: + df["totsupp509"] = df["totsupp509"].apply(convert_to_int) + + logging.info( + f"Transform: Reordering headers for.. {os.path.basename(urlparse(source_url).path)}" + ) + + df = df[headers] + + # save to output file + logging.info(f"Saving to output file.. {target_file}") + try: + save_to_new_file(df, file_path=str(target_file)) + except Exception as e: + logging.error(f"Error saving output file: {e}.") + + # upload to GCS + logging.info( + f"Uploading output file to.. gs://{target_gcs_bucket}/{target_gcs_path}" + ) + upload_file_to_gcs(target_file, target_gcs_bucket, target_gcs_path) + + logging.info( + f"irs 990 {pipeline_name} process completed at " + + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + ) + + +def rename_headers(df, rename_mappings): + df = df.rename(columns=rename_mappings, inplace=True) + + +def filter_null_rows(df): + df = df[df.ein != ""] + + +def save_to_new_file(df, file_path): + # df.export_csv(file_path) + df.to_csv(file_path, index=False) + + +def download_file(source_url: str, source_file: pathlib.Path): + logging.info(f"Downloading {source_url} into {source_file}") + r = requests.get(source_url, stream=True) + if r.status_code == 200: + with open(source_file, "wb") as f: + for chunk in r: + f.write(chunk) + else: + logging.error(f"Couldn't download {source_url}: {r.text}") + + +def convert_to_int(input: str) -> str: + str_val = "" + if input == "" or (math.isnan(input)): + str_val = "" + else: + str_val = str(int(round(input, 0))) + return str_val + + +def upload_file_to_gcs(file_path: pathlib.Path, gcs_bucket: str, gcs_path: str) -> None: + storage_client = storage.Client() + bucket = storage_client.bucket(gcs_bucket) + blob = bucket.blob(gcs_path) + blob.upload_from_filename(file_path) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + + main( + source_url=os.environ["SOURCE_URL"], + source_file=pathlib.Path(os.environ["SOURCE_FILE"]).expanduser(), + target_file=pathlib.Path(os.environ["TARGET_FILE"]).expanduser(), + target_gcs_bucket=os.environ["TARGET_GCS_BUCKET"], + target_gcs_path=os.environ["TARGET_GCS_PATH"], + headers=json.loads(os.environ["CSV_HEADERS"]), + rename_mappings=json.loads(os.environ["RENAME_MAPPINGS"]), + pipeline_name=os.environ["PIPELINE_NAME"], + ) diff --git a/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt b/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt new file mode 100644 index 000000000..ecd275f68 --- /dev/null +++ b/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt @@ -0,0 +1,4 @@ +requests +vaex +google-cloud-storage +pandas diff --git a/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf new file mode 100644 index 000000000..88937f00f --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_2014" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_2014" + + description = "irs_990 2014 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_2014-table_id" { + value = google_bigquery_table.irs_990_2014.table_id +} + +output "bigquery_table-irs_990_2014-id" { + value = google_bigquery_table.irs_990_2014.id +} diff --git a/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf new file mode 100644 index 000000000..3ee8b4c57 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_2015" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_2015" + + description = "irs_990 2015 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_2015-table_id" { + value = google_bigquery_table.irs_990_2015.table_id +} + +output "bigquery_table-irs_990_2015-id" { + value = google_bigquery_table.irs_990_2015.id +} diff --git a/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf new file mode 100644 index 000000000..d88ea75b8 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_2016" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_2016" + + description = "irs_990_2016 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_2016-table_id" { + value = google_bigquery_table.irs_990_2016.table_id +} + +output "bigquery_table-irs_990_2016-id" { + value = google_bigquery_table.irs_990_2016.id +} diff --git a/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf new file mode 100644 index 000000000..7be2429de --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_2017" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_2017" + + description = "irs_990_2017 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_2017-table_id" { + value = google_bigquery_table.irs_990_2017.table_id +} + +output "bigquery_table-irs_990_2017-id" { + value = google_bigquery_table.irs_990_2017.id +} diff --git a/datasets/irs_990/_terraform/irs_990_dataset.tf b/datasets/irs_990/_terraform/irs_990_dataset.tf new file mode 100644 index 000000000..0f847c3b3 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_dataset.tf @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_dataset" "irs_990" { + dataset_id = "irs_990" + project = var.project_id + description = "irs_990" +} + +output "bigquery_dataset-irs_990-dataset_id" { + value = google_bigquery_dataset.irs_990.dataset_id +} diff --git a/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf new file mode 100644 index 000000000..3cae56f31 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_ez_2014" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_ez_2014" + + description = "irs_990_ez_2014 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_ez_2014-table_id" { + value = google_bigquery_table.irs_990_ez_2014.table_id +} + +output "bigquery_table-irs_990_ez_2014-id" { + value = google_bigquery_table.irs_990_ez_2014.id +} diff --git a/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf new file mode 100644 index 000000000..3c50c8715 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_ez_2015" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_ez_2015" + + description = "irs_990_ez_2015 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_ez_2015-table_id" { + value = google_bigquery_table.irs_990_ez_2015.table_id +} + +output "bigquery_table-irs_990_ez_2015-id" { + value = google_bigquery_table.irs_990_ez_2015.id +} diff --git a/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf new file mode 100644 index 000000000..46b95047f --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_ez_2016" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_ez_2016" + + description = "irs_990_ez_2016 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_ez_2016-table_id" { + value = google_bigquery_table.irs_990_ez_2016.table_id +} + +output "bigquery_table-irs_990_ez_2016-id" { + value = google_bigquery_table.irs_990_ez_2016.id +} diff --git a/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf new file mode 100644 index 000000000..74a660625 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_ez_2017" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_ez_2017" + + description = "irs_990_ez_2017 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_ez_2017-table_id" { + value = google_bigquery_table.irs_990_ez_2017.table_id +} + +output "bigquery_table-irs_990_ez_2017-id" { + value = google_bigquery_table.irs_990_ez_2017.id +} diff --git a/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf b/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf new file mode 100644 index 000000000..5cf796be4 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_pf_2014" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_pf_2014" + + description = "irs_990_pf_2014 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_pf_2014-table_id" { + value = google_bigquery_table.irs_990_pf_2014.table_id +} + +output "bigquery_table-irs_990_pf_2014-id" { + value = google_bigquery_table.irs_990_pf_2014.id +} diff --git a/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf b/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf new file mode 100644 index 000000000..eac0f9ae4 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_pf_2015" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_pf_2015" + + description = "irs_990_pf_2015 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_pf_2015-table_id" { + value = google_bigquery_table.irs_990_pf_2015.table_id +} + +output "bigquery_table-irs_990_pf_2015-id" { + value = google_bigquery_table.irs_990_pf_2015.id +} diff --git a/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf b/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf new file mode 100644 index 000000000..e1b45d250 --- /dev/null +++ b/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "irs_990_pf_2016" { + project = var.project_id + dataset_id = "irs_990" + table_id = "irs_990_pf_2016" + + description = "irs_990_pf_2016 dataset" + + + + + depends_on = [ + google_bigquery_dataset.irs_990 + ] +} + +output "bigquery_table-irs_990_pf_2016-table_id" { + value = google_bigquery_table.irs_990_pf_2016.table_id +} + +output "bigquery_table-irs_990_pf_2016-id" { + value = google_bigquery_table.irs_990_pf_2016.id +} diff --git a/datasets/irs_990/_terraform/provider.tf b/datasets/irs_990/_terraform/provider.tf new file mode 100644 index 000000000..23ab87dcd --- /dev/null +++ b/datasets/irs_990/_terraform/provider.tf @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +provider "google" { + project = var.project_id + impersonate_service_account = var.impersonating_acct + region = var.region +} + +data "google_client_openid_userinfo" "me" {} + +output "impersonating-account" { + value = data.google_client_openid_userinfo.me.email +} diff --git a/datasets/irs_990/_terraform/variables.tf b/datasets/irs_990/_terraform/variables.tf new file mode 100644 index 000000000..c3ec7c506 --- /dev/null +++ b/datasets/irs_990/_terraform/variables.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +variable "project_id" {} +variable "bucket_name_prefix" {} +variable "impersonating_acct" {} +variable "region" {} +variable "env" {} + diff --git a/datasets/irs_990/dataset.yaml b/datasets/irs_990/dataset.yaml new file mode 100644 index 000000000..fb1e34f92 --- /dev/null +++ b/datasets/irs_990/dataset.yaml @@ -0,0 +1,58 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dataset: + # The `dataset` block includes properties for your dataset that will be shown + # to users of your data on the Google Cloud website. + + # Must be exactly the same name as the folder name your dataset.yaml is in. + name: irs_990 + + # A friendly, human-readable name of the dataset + friendly_name: irs_990 + + # A short, descriptive summary of the dataset. + description: irs_990 based 2015 datasets + + # A list of sources the dataset is derived from, using the YAML list syntax. + dataset_sources: ~ + + # A list of terms and conditions that users of the dataset should agree on, + # using the YAML list syntax. + terms_of_use: ~ + + +resources: + # A list of Google Cloud resources needed by your dataset. In principle, all + # pipelines under a dataset should be able to share these resources. + # + # The currently supported resources are shown below. Use only the resources + # you need, and delete the rest as needed by your pipeline. + # + # We will keep adding to the list below to support more Google Cloud resources + # over time. If a resource you need isn't supported, please file an issue on + # the repository. + + - type: bigquery_dataset + # Google BigQuery dataset to namespace all tables managed by this folder + # + # Required Properties: + # dataset_id + # + # Optional Properties: + # friendly_name (A user-friendly name of the dataset) + # description (A user-friendly description of the dataset) + # location (The geographic location where the dataset should reside) + dataset_id: irs_990 + description: irs_990 diff --git a/datasets/irs_990/irs_990_2014/irs_990_2014_dag.py b/datasets/irs_990/irs_990_2014/irs_990_2014_dag.py new file mode 100644 index 000000000..b9fbb59fb --- /dev/null +++ b/datasets/irs_990/irs_990_2014/irs_990_2014_dag.py @@ -0,0 +1,314 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_2014", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_transform_csv", + startup_timeout_seconds=600, + name="irs_990_2014", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/14eofinextract990.zip", + "SOURCE_FILE": "files/data.zip", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_2014/data_output.csv", + "PIPELINE_NAME": "irs_990_2015", + "CSV_HEADERS": '["ein","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"}', + }, + resources={"request_memory": "2G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_2014/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_2014", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + {"name": "ein", "type": "string", "mode": "required"}, + {"name": "tax_pd", "type": "integer", "mode": "nullable"}, + {"name": "subseccd", "type": "integer", "mode": "nullable"}, + {"name": "s501c3or4947a1cd", "type": "string", "mode": "nullable"}, + {"name": "schdbind", "type": "string", "mode": "nullable"}, + {"name": "politicalactvtscd", "type": "string", "mode": "nullable"}, + {"name": "lbbyingactvtscd", "type": "string", "mode": "nullable"}, + {"name": "subjto6033cd", "type": "string", "mode": "nullable"}, + {"name": "dnradvisedfundscd", "type": "string", "mode": "nullable"}, + {"name": "prptyintrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "maintwrkofartcd", "type": "string", "mode": "nullable"}, + {"name": "crcounselingqstncd", "type": "string", "mode": "nullable"}, + {"name": "hldassetsintermpermcd", "type": "string", "mode": "nullable"}, + {"name": "rptlndbldgeqptcd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstothsecd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstprgrelcd", "type": "string", "mode": "nullable"}, + {"name": "rptothasstcd", "type": "string", "mode": "nullable"}, + {"name": "rptothliabcd", "type": "string", "mode": "nullable"}, + {"name": "sepcnsldtfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "sepindaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "inclinfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "operateschools170cd", "type": "string", "mode": "nullable"}, + {"name": "frgnofficecd", "type": "string", "mode": "nullable"}, + {"name": "frgnrevexpnscd", "type": "string", "mode": "nullable"}, + {"name": "frgngrntscd", "type": "string", "mode": "nullable"}, + {"name": "frgnaggragrntscd", "type": "string", "mode": "nullable"}, + {"name": "rptprofndrsngfeescd", "type": "string", "mode": "nullable"}, + {"name": "rptincfnndrsngcd", "type": "string", "mode": "nullable"}, + {"name": "rptincgamingcd", "type": "string", "mode": "nullable"}, + {"name": "operatehosptlcd", "type": "string", "mode": "nullable"}, + {"name": "hospaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstogovtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstoindvcd", "type": "string", "mode": "nullable"}, + {"name": "rptyestocompnstncd", "type": "string", "mode": "nullable"}, + {"name": "txexmptbndcd", "type": "string", "mode": "nullable"}, + {"name": "invstproceedscd", "type": "string", "mode": "nullable"}, + {"name": "maintescrwaccntcd", "type": "string", "mode": "nullable"}, + {"name": "actonbehalfcd", "type": "string", "mode": "nullable"}, + {"name": "engageexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "awarexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "loantofficercd", "type": "string", "mode": "nullable"}, + {"name": "grantoofficercd", "type": "string", "mode": "nullable"}, + {"name": "dirbusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "fmlybusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "servasofficercd", "type": "string", "mode": "nullable"}, + {"name": "recvnoncashcd", "type": "string", "mode": "nullable"}, + {"name": "recvartcd", "type": "string", "mode": "nullable"}, + {"name": "ceaseoperationscd", "type": "string", "mode": "nullable"}, + {"name": "sellorexchcd", "type": "string", "mode": "nullable"}, + {"name": "ownsepentcd", "type": "string", "mode": "nullable"}, + {"name": "reltdorgcd", "type": "string", "mode": "nullable"}, + {"name": "intincntrlcd", "type": "string", "mode": "nullable"}, + {"name": "orgtrnsfrcd", "type": "string", "mode": "nullable"}, + {"name": "conduct5percentcd", "type": "string", "mode": "nullable"}, + {"name": "compltschocd", "type": "string", "mode": "nullable"}, + {"name": "f1096cnt", "type": "integer", "mode": "nullable"}, + {"name": "fw2gcnt", "type": "integer", "mode": "nullable"}, + {"name": "wthldngrulescd", "type": "string", "mode": "nullable"}, + {"name": "noemplyeesw3cnt", "type": "integer", "mode": "nullable"}, + {"name": "filerqrdrtnscd", "type": "string", "mode": "nullable"}, + {"name": "unrelbusinccd", "type": "string", "mode": "nullable"}, + {"name": "filedf990tcd", "type": "string", "mode": "nullable"}, + {"name": "frgnacctcd", "type": "string", "mode": "nullable"}, + {"name": "prohibtdtxshltrcd", "type": "string", "mode": "nullable"}, + {"name": "prtynotifyorgcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8886tcd", "type": "string", "mode": "nullable"}, + {"name": "solicitcntrbcd", "type": "string", "mode": "nullable"}, + {"name": "exprstmntcd", "type": "string", "mode": "nullable"}, + {"name": "providegoodscd", "type": "string", "mode": "nullable"}, + {"name": "notfydnrvalcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8282cd", "type": "string", "mode": "nullable"}, + {"name": "f8282cnt", "type": "integer", "mode": "nullable"}, + {"name": "fndsrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "premiumspaidcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8899cd", "type": "string", "mode": "nullable"}, + {"name": "filedf1098ccd", "type": "string", "mode": "nullable"}, + {"name": "excbushldngscd", "type": "string", "mode": "nullable"}, + {"name": "s4966distribcd", "type": "string", "mode": "nullable"}, + {"name": "distribtodonorcd", "type": "string", "mode": "nullable"}, + {"name": "initiationfees", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptspublicuse", "type": "integer", "mode": "nullable"}, + {"name": "grsincmembers", "type": "integer", "mode": "nullable"}, + {"name": "grsincother", "type": "integer", "mode": "nullable"}, + {"name": "filedlieuf1041cd", "type": "string", "mode": "nullable"}, + {"name": "txexmptint", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthplncd", "type": "string", "mode": "nullable"}, + {"name": "qualhlthreqmntn", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthonhnd", "type": "integer", "mode": "nullable"}, + {"name": "rcvdpdtngcd", "type": "string", "mode": "nullable"}, + {"name": "filedf720cd", "type": "string", "mode": "nullable"}, + {"name": "totreprtabled", "type": "integer", "mode": "nullable"}, + {"name": "totcomprelatede", "type": "integer", "mode": "nullable"}, + {"name": "totestcompf", "type": "integer", "mode": "nullable"}, + {"name": "noindiv100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "nocontractor100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "totcntrbgfts", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2acd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2acola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2bcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2bcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ccd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ccola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2dcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2dcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ecd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ecola", "type": "integer", "mode": "nullable"}, + {"name": "totrev2fcola", "type": "integer", "mode": "nullable"}, + {"name": "totprgmrevnue", "type": "integer", "mode": "nullable"}, + {"name": "invstmntinc", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsproceeds", "type": "integer", "mode": "nullable"}, + {"name": "royaltsinc", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsreal", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlincreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlincprsnl", "type": "integer", "mode": "nullable"}, + {"name": "netrntlinc", "type": "integer", "mode": "nullable"}, + {"name": "grsalesecur", "type": "integer", "mode": "nullable"}, + {"name": "grsalesothr", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisecur", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisothr", "type": "integer", "mode": "nullable"}, + {"name": "gnlsecur", "type": "integer", "mode": "nullable"}, + {"name": "gnlsothr", "type": "integer", "mode": "nullable"}, + {"name": "netgnls", "type": "integer", "mode": "nullable"}, + {"name": "grsincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "lessdirfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "netincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "grsincgaming", "type": "integer", "mode": "nullable"}, + {"name": "lessdirgaming", "type": "integer", "mode": "nullable"}, + {"name": "netincgaming", "type": "integer", "mode": "nullable"}, + {"name": "grsalesinvent", "type": "integer", "mode": "nullable"}, + {"name": "lesscstofgoods", "type": "integer", "mode": "nullable"}, + {"name": "netincsales", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11acd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtota", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11bcd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11b", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11ccd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11c", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11d", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11e", "type": "integer", "mode": "nullable"}, + {"name": "totrevenue", "type": "integer", "mode": "nullable"}, + {"name": "grntstogovt", "type": "integer", "mode": "nullable"}, + {"name": "grnsttoindiv", "type": "integer", "mode": "nullable"}, + {"name": "grntstofrgngovt", "type": "integer", "mode": "nullable"}, + {"name": "benifitsmembrs", "type": "integer", "mode": "nullable"}, + {"name": "compnsatncurrofcr", "type": "integer", "mode": "nullable"}, + {"name": "compnsatnandothr", "type": "integer", "mode": "nullable"}, + {"name": "othrsalwages", "type": "integer", "mode": "nullable"}, + {"name": "pensionplancontrb", "type": "integer", "mode": "nullable"}, + {"name": "othremplyeebenef", "type": "integer", "mode": "nullable"}, + {"name": "payrolltx", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcmgmt", "type": "integer", "mode": "nullable"}, + {"name": "legalfees", "type": "integer", "mode": "nullable"}, + {"name": "accntingfees", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvclobby", "type": "integer", "mode": "nullable"}, + {"name": "profndraising", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcinvstmgmt", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcothr", "type": "integer", "mode": "nullable"}, + {"name": "advrtpromo", "type": "integer", "mode": "nullable"}, + {"name": "officexpns", "type": "integer", "mode": "nullable"}, + {"name": "infotech", "type": "integer", "mode": "nullable"}, + {"name": "royaltsexpns", "type": "integer", "mode": "nullable"}, + {"name": "occupancy", "type": "integer", "mode": "nullable"}, + {"name": "travel", "type": "integer", "mode": "nullable"}, + {"name": "travelofpublicoffcl", "type": "integer", "mode": "nullable"}, + {"name": "converconventmtng", "type": "integer", "mode": "nullable"}, + {"name": "interestamt", "type": "integer", "mode": "nullable"}, + {"name": "pymtoaffiliates", "type": "integer", "mode": "nullable"}, + {"name": "deprcatndepletn", "type": "integer", "mode": "nullable"}, + {"name": "insurance", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsa", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsb", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsc", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsd", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnse", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsf", "type": "integer", "mode": "nullable"}, + {"name": "totfuncexpns", "type": "integer", "mode": "nullable"}, + {"name": "nonintcashend", "type": "integer", "mode": "nullable"}, + {"name": "svngstempinvend", "type": "integer", "mode": "nullable"}, + {"name": "pldgegrntrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "accntsrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "currfrmrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "rcvbldisqualend", "type": "integer", "mode": "nullable"}, + {"name": "notesloansrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "invntriesalesend", "type": "integer", "mode": "nullable"}, + {"name": "prepaidexpnsend", "type": "integer", "mode": "nullable"}, + {"name": "lndbldgsequipend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsothrend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsprgmend", "type": "integer", "mode": "nullable"}, + {"name": "intangibleassetsend", "type": "integer", "mode": "nullable"}, + {"name": "othrassetsend", "type": "integer", "mode": "nullable"}, + {"name": "totassetsend", "type": "integer", "mode": "nullable"}, + {"name": "accntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "grntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "deferedrevnuend", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsend", "type": "integer", "mode": "nullable"}, + {"name": "escrwaccntliabend", "type": "integer", "mode": "nullable"}, + {"name": "paybletoffcrsend", "type": "integer", "mode": "nullable"}, + {"name": "secrdmrtgsend", "type": "integer", "mode": "nullable"}, + {"name": "unsecurednotesend", "type": "integer", "mode": "nullable"}, + {"name": "othrliabend", "type": "integer", "mode": "nullable"}, + {"name": "totliabend", "type": "integer", "mode": "nullable"}, + {"name": "unrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "temprstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "permrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "capitalstktrstend", "type": "integer", "mode": "nullable"}, + {"name": "paidinsurplusend", "type": "integer", "mode": "nullable"}, + {"name": "retainedearnend", "type": "integer", "mode": "nullable"}, + {"name": "totnetassetend", "type": "integer", "mode": "nullable"}, + {"name": "totnetliabastend", "type": "integer", "mode": "nullable"}, + {"name": "nonpfrea", "type": "integer", "mode": "nullable"}, + {"name": "totnooforgscnt", "type": "integer", "mode": "nullable"}, + {"name": "totsupport", "type": "integer", "mode": "nullable"}, + {"name": "gftgrntsrcvd170", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied170", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval170", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "exceeds2pct170", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesspct170", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "grsinc170", "type": "integer", "mode": "nullable"}, + {"name": "netincunreltd170", "type": "integer", "mode": "nullable"}, + {"name": "othrinc170", "type": "integer", "mode": "nullable"}, + {"name": "totsupp170", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsrelated170", "type": "integer", "mode": "nullable"}, + {"name": "totgftgrntrcvd509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsadmissn509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsactivities509", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied509", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval509", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "rcvdfrmdisqualsub509", "type": "integer", "mode": "nullable"}, + {"name": "exceeds1pct509", "type": "integer", "mode": "nullable"}, + {"name": "subtotpub509", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesub509", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "grsinc509", "type": "integer", "mode": "nullable"}, + {"name": "unreltxincls511tx509", "type": "integer", "mode": "nullable"}, + {"name": "subtotsuppinc509", "type": "integer", "mode": "nullable"}, + {"name": "netincunrelatd509", "type": "integer", "mode": "nullable"}, + {"name": "othrinc509", "type": "integer", "mode": "nullable"}, + {"name": "totsupp509", "type": "integer", "mode": "nullable"}, + ], + ) + + irs_990_transform_csv >> load_irs_990_to_bq diff --git a/datasets/irs_990/irs_990_2014/pipeline.yaml b/datasets/irs_990/irs_990_2014/pipeline.yaml new file mode 100644 index 000000000..25bffea7a --- /dev/null +++ b/datasets/irs_990/irs_990_2014/pipeline.yaml @@ -0,0 +1,846 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_2014 + + # Description of the table + description: "irs_990 2014 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_2014 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" # runs everyday at 7am EST + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_2014" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/14eofinextract990.zip" + SOURCE_FILE: "files/data.zip" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_2014/data_output.csv" + PIPELINE_NAME: "irs_990_2015" + CSV_HEADERS: >- + ["ein","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "2G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_2014/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_2014" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name : "ein" + type : "string" + mode : "required" + - name : "tax_pd" + type : "integer" + mode : "nullable" + - name : "subseccd" + type : "integer" + mode : "nullable" + - name : "s501c3or4947a1cd" + type : "string" + mode : "nullable" + - name : "schdbind" + type : "string" + mode : "nullable" + - name : "politicalactvtscd" + type : "string" + mode : "nullable" + - name : "lbbyingactvtscd" + type : "string" + mode : "nullable" + - name : "subjto6033cd" + type : "string" + mode : "nullable" + - name : "dnradvisedfundscd" + type : "string" + mode : "nullable" + - name : "prptyintrcvdcd" + type : "string" + mode : "nullable" + - name : "maintwrkofartcd" + type : "string" + mode : "nullable" + - name : "crcounselingqstncd" + type : "string" + mode : "nullable" + - name : "hldassetsintermpermcd" + type : "string" + mode : "nullable" + - name : "rptlndbldgeqptcd" + type : "string" + mode : "nullable" + - name : "rptinvstothsecd" + type : "string" + mode : "nullable" + - name : "rptinvstprgrelcd" + type : "string" + mode : "nullable" + - name : "rptothasstcd" + type : "string" + mode : "nullable" + - name : "rptothliabcd" + type : "string" + mode : "nullable" + - name : "sepcnsldtfinstmtcd" + type : "string" + mode : "nullable" + - name : "sepindaudfinstmtcd" + type : "string" + mode : "nullable" + - name : "inclinfinstmtcd" + type : "string" + mode : "nullable" + - name : "operateschools170cd" + type : "string" + mode : "nullable" + - name : "frgnofficecd" + type : "string" + mode : "nullable" + - name : "frgnrevexpnscd" + type : "string" + mode : "nullable" + - name : "frgngrntscd" + type : "string" + mode : "nullable" + - name : "frgnaggragrntscd" + type : "string" + mode : "nullable" + - name : "rptprofndrsngfeescd" + type : "string" + mode : "nullable" + - name : "rptincfnndrsngcd" + type : "string" + mode : "nullable" + - name : "rptincgamingcd" + type : "string" + mode : "nullable" + - name : "operatehosptlcd" + type : "string" + mode : "nullable" + - name : "hospaudfinstmtcd" + type : "string" + mode : "nullable" + - name : "rptgrntstogovtcd" + type : "string" + mode : "nullable" + - name : "rptgrntstoindvcd" + type : "string" + mode : "nullable" + - name : "rptyestocompnstncd" + type : "string" + mode : "nullable" + - name : "txexmptbndcd" + type : "string" + mode : "nullable" + - name : "invstproceedscd" + type : "string" + mode : "nullable" + - name : "maintescrwaccntcd" + type : "string" + mode : "nullable" + - name : "actonbehalfcd" + type : "string" + mode : "nullable" + - name : "engageexcessbnftcd" + type : "string" + mode : "nullable" + - name : "awarexcessbnftcd" + type : "string" + mode : "nullable" + - name : "loantofficercd" + type : "string" + mode : "nullable" + - name : "grantoofficercd" + type : "string" + mode : "nullable" + - name : "dirbusnreltdcd" + type : "string" + mode : "nullable" + - name : "fmlybusnreltdcd" + type : "string" + mode : "nullable" + - name : "servasofficercd" + type : "string" + mode : "nullable" + - name : "recvnoncashcd" + type : "string" + mode : "nullable" + - name : "recvartcd" + type : "string" + mode : "nullable" + - name : "ceaseoperationscd" + type : "string" + mode : "nullable" + - name : "sellorexchcd" + type : "string" + mode : "nullable" + - name : "ownsepentcd" + type : "string" + mode : "nullable" + - name : "reltdorgcd" + type : "string" + mode : "nullable" + - name : "intincntrlcd" + type : "string" + mode : "nullable" + - name : "orgtrnsfrcd" + type : "string" + mode : "nullable" + - name : "conduct5percentcd" + type : "string" + mode : "nullable" + - name : "compltschocd" + type : "string" + mode : "nullable" + - name : "f1096cnt" + type : "integer" + mode : "nullable" + - name : "fw2gcnt" + type : "integer" + mode : "nullable" + - name : "wthldngrulescd" + type : "string" + mode : "nullable" + - name : "noemplyeesw3cnt" + type : "integer" + mode : "nullable" + - name : "filerqrdrtnscd" + type : "string" + mode : "nullable" + - name : "unrelbusinccd" + type : "string" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + mode : "nullable" + - name : "frgnacctcd" + type : "string" + mode : "nullable" + - name : "prohibtdtxshltrcd" + type : "string" + mode : "nullable" + - name : "prtynotifyorgcd" + type : "string" + mode : "nullable" + - name : "filedf8886tcd" + type : "string" + mode : "nullable" + - name : "solicitcntrbcd" + type : "string" + mode : "nullable" + - name : "exprstmntcd" + type : "string" + mode : "nullable" + - name : "providegoodscd" + type : "string" + mode : "nullable" + - name : "notfydnrvalcd" + type : "string" + mode : "nullable" + - name : "filedf8282cd" + type : "string" + mode : "nullable" + - name : "f8282cnt" + type : "integer" + mode : "nullable" + - name : "fndsrcvdcd" + type : "string" + mode : "nullable" + - name : "premiumspaidcd" + type : "string" + mode : "nullable" + - name : "filedf8899cd" + type : "string" + mode : "nullable" + - name : "filedf1098ccd" + type : "string" + mode : "nullable" + - name : "excbushldngscd" + type : "string" + mode : "nullable" + - name : "s4966distribcd" + type : "string" + mode : "nullable" + - name : "distribtodonorcd" + type : "string" + mode : "nullable" + - name : "initiationfees" + type : "integer" + mode : "nullable" + - name : "grsrcptspublicuse" + type : "integer" + mode : "nullable" + - name : "grsincmembers" + type : "integer" + mode : "nullable" + - name : "grsincother" + type : "integer" + mode : "nullable" + - name : "filedlieuf1041cd" + type : "string" + mode : "nullable" + - name : "txexmptint" + type : "integer" + mode : "nullable" + - name : "qualhlthplncd" + type : "string" + mode : "nullable" + - name : "qualhlthreqmntn" + type : "integer" + mode : "nullable" + - name : "qualhlthonhnd" + type : "integer" + mode : "nullable" + - name : "rcvdpdtngcd" + type : "string" + mode : "nullable" + - name : "filedf720cd" + type : "string" + mode : "nullable" + - name : "totreprtabled" + type : "integer" + mode : "nullable" + - name : "totcomprelatede" + type : "integer" + mode : "nullable" + - name : "totestcompf" + type : "integer" + mode : "nullable" + - name : "noindiv100kcnt" + type : "integer" + mode : "nullable" + - name : "nocontractor100kcnt" + type : "integer" + mode : "nullable" + - name : "totcntrbgfts" + type : "integer" + mode : "nullable" + - name : "prgmservcode2acd" + type : "integer" + mode : "nullable" + - name : "totrev2acola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2bcd" + type : "integer" + mode : "nullable" + - name : "totrev2bcola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2ccd" + type : "integer" + mode : "nullable" + - name : "totrev2ccola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2dcd" + type : "integer" + mode : "nullable" + - name : "totrev2dcola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2ecd" + type : "integer" + mode : "nullable" + - name : "totrev2ecola" + type : "integer" + mode : "nullable" + - name : "totrev2fcola" + type : "integer" + mode : "nullable" + - name : "totprgmrevnue" + type : "integer" + mode : "nullable" + - name : "invstmntinc" + type : "integer" + mode : "nullable" + - name : "txexmptbndsproceeds" + type : "integer" + mode : "nullable" + - name : "royaltsinc" + type : "integer" + mode : "nullable" + - name : "grsrntsreal" + type : "integer" + mode : "nullable" + - name : "grsrntsprsnl" + type : "integer" + mode : "nullable" + - name : "rntlexpnsreal" + type : "integer" + mode : "nullable" + - name : "rntlexpnsprsnl" + type : "integer" + mode : "nullable" + - name : "rntlincreal" + type : "integer" + mode : "nullable" + - name : "rntlincprsnl" + type : "integer" + mode : "nullable" + - name : "netrntlinc" + type : "integer" + mode : "nullable" + - name : "grsalesecur" + type : "integer" + mode : "nullable" + - name : "grsalesothr" + type : "integer" + mode : "nullable" + - name : "cstbasisecur" + type : "integer" + mode : "nullable" + - name : "cstbasisothr" + type : "integer" + mode : "nullable" + - name : "gnlsecur" + type : "integer" + mode : "nullable" + - name : "gnlsothr" + type : "integer" + mode : "nullable" + - name : "netgnls" + type : "integer" + mode : "nullable" + - name : "grsincfndrsng" + type : "integer" + mode : "nullable" + - name : "lessdirfndrsng" + type : "integer" + mode : "nullable" + - name : "netincfndrsng" + type : "integer" + mode : "nullable" + - name : "grsincgaming" + type : "integer" + mode : "nullable" + - name : "lessdirgaming" + type : "integer" + mode : "nullable" + - name : "netincgaming" + type : "integer" + mode : "nullable" + - name : "grsalesinvent" + type : "integer" + mode : "nullable" + - name : "lesscstofgoods" + type : "integer" + mode : "nullable" + - name : "netincsales" + type : "integer" + mode : "nullable" + - name : "miscrev11acd" + type : "integer" + mode : "nullable" + - name : "miscrevtota" + type : "integer" + mode : "nullable" + - name : "miscrev11bcd" + type : "integer" + mode : "nullable" + - name : "miscrevtot11b" + type : "integer" + mode : "nullable" + - name : "miscrev11ccd" + type : "integer" + mode : "nullable" + - name : "miscrevtot11c" + type : "integer" + mode : "nullable" + - name : "miscrevtot11d" + type : "integer" + mode : "nullable" + - name : "miscrevtot11e" + type : "integer" + mode : "nullable" + - name : "totrevenue" + type : "integer" + mode : "nullable" + - name : "grntstogovt" + type : "integer" + mode : "nullable" + - name : "grnsttoindiv" + type : "integer" + mode : "nullable" + - name : "grntstofrgngovt" + type : "integer" + mode : "nullable" + - name : "benifitsmembrs" + type : "integer" + mode : "nullable" + - name : "compnsatncurrofcr" + type : "integer" + mode : "nullable" + - name : "compnsatnandothr" + type : "integer" + mode : "nullable" + - name : "othrsalwages" + type : "integer" + mode : "nullable" + - name : "pensionplancontrb" + type : "integer" + mode : "nullable" + - name : "othremplyeebenef" + type : "integer" + mode : "nullable" + - name : "payrolltx" + type : "integer" + mode : "nullable" + - name : "feesforsrvcmgmt" + type : "integer" + mode : "nullable" + - name : "legalfees" + type : "integer" + mode : "nullable" + - name : "accntingfees" + type : "integer" + mode : "nullable" + - name : "feesforsrvclobby" + type : "integer" + mode : "nullable" + - name : "profndraising" + type : "integer" + mode : "nullable" + - name : "feesforsrvcinvstmgmt" + type : "integer" + mode : "nullable" + - name : "feesforsrvcothr" + type : "integer" + mode : "nullable" + - name : "advrtpromo" + type : "integer" + mode : "nullable" + - name : "officexpns" + type : "integer" + mode : "nullable" + - name : "infotech" + type : "integer" + mode : "nullable" + - name : "royaltsexpns" + type : "integer" + mode : "nullable" + - name : "occupancy" + type : "integer" + mode : "nullable" + - name : "travel" + type : "integer" + mode : "nullable" + - name : "travelofpublicoffcl" + type : "integer" + mode : "nullable" + - name : "converconventmtng" + type : "integer" + mode : "nullable" + - name : "interestamt" + type : "integer" + mode : "nullable" + - name : "pymtoaffiliates" + type : "integer" + mode : "nullable" + - name : "deprcatndepletn" + type : "integer" + mode : "nullable" + - name : "insurance" + type : "integer" + mode : "nullable" + - name : "othrexpnsa" + type : "integer" + mode : "nullable" + - name : "othrexpnsb" + type : "integer" + mode : "nullable" + - name : "othrexpnsc" + type : "integer" + mode : "nullable" + - name : "othrexpnsd" + type : "integer" + mode : "nullable" + - name : "othrexpnse" + type : "integer" + mode : "nullable" + - name : "othrexpnsf" + type : "integer" + mode : "nullable" + - name : "totfuncexpns" + type : "integer" + mode : "nullable" + - name : "nonintcashend" + type : "integer" + mode : "nullable" + - name : "svngstempinvend" + type : "integer" + mode : "nullable" + - name : "pldgegrntrcvblend" + type : "integer" + mode : "nullable" + - name : "accntsrcvblend" + type : "integer" + mode : "nullable" + - name : "currfrmrcvblend" + type : "integer" + mode : "nullable" + - name : "rcvbldisqualend" + type : "integer" + mode : "nullable" + - name : "notesloansrcvblend" + type : "integer" + mode : "nullable" + - name : "invntriesalesend" + type : "integer" + mode : "nullable" + - name : "prepaidexpnsend" + type : "integer" + mode : "nullable" + - name : "lndbldgsequipend" + type : "integer" + mode : "nullable" + - name : "invstmntsend" + type : "integer" + mode : "nullable" + - name : "invstmntsothrend" + type : "integer" + mode : "nullable" + - name : "invstmntsprgmend" + type : "integer" + mode : "nullable" + - name : "intangibleassetsend" + type : "integer" + mode : "nullable" + - name : "othrassetsend" + type : "integer" + mode : "nullable" + - name : "totassetsend" + type : "integer" + mode : "nullable" + - name : "accntspayableend" + type : "integer" + mode : "nullable" + - name : "grntspayableend" + type : "integer" + mode : "nullable" + - name : "deferedrevnuend" + type : "integer" + mode : "nullable" + - name : "txexmptbndsend" + type : "integer" + mode : "nullable" + - name : "escrwaccntliabend" + type : "integer" + mode : "nullable" + - name : "paybletoffcrsend" + type : "integer" + mode : "nullable" + - name : "secrdmrtgsend" + type : "integer" + mode : "nullable" + - name : "unsecurednotesend" + type : "integer" + mode : "nullable" + - name : "othrliabend" + type : "integer" + mode : "nullable" + - name : "totliabend" + type : "integer" + mode : "nullable" + - name : "unrstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "temprstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "permrstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "capitalstktrstend" + type : "integer" + mode : "nullable" + - name : "paidinsurplusend" + type : "integer" + mode : "nullable" + - name : "retainedearnend" + type : "integer" + mode : "nullable" + - name : "totnetassetend" + type : "integer" + mode : "nullable" + - name : "totnetliabastend" + type : "integer" + mode : "nullable" + - name : "nonpfrea" + type : "integer" + mode : "nullable" + - name : "totnooforgscnt" + type : "integer" + mode : "nullable" + - name : "totsupport" + type : "integer" + mode : "nullable" + - name : "gftgrntsrcvd170" + type : "integer" + mode : "nullable" + - name : "txrevnuelevied170" + type : "integer" + mode : "nullable" + - name : "srvcsval170" + type : "integer" + mode : "nullable" + - name : "pubsuppsubtot170" + type : "integer" + mode : "nullable" + - name : "exceeds2pct170" + type : "integer" + mode : "nullable" + - name : "pubsupplesspct170" + type : "integer" + mode : "nullable" + - name : "samepubsuppsubtot170" + type : "integer" + mode : "nullable" + - name : "grsinc170" + type : "integer" + mode : "nullable" + - name : "netincunreltd170" + type : "integer" + mode : "nullable" + - name : "othrinc170" + type : "integer" + mode : "nullable" + - name : "totsupp170" + type : "integer" + mode : "nullable" + - name : "grsrcptsrelated170" + type : "integer" + mode : "nullable" + - name : "totgftgrntrcvd509" + type : "integer" + mode : "nullable" + - name : "grsrcptsadmissn509" + type : "integer" + mode : "nullable" + - name : "grsrcptsactivities509" + type : "integer" + mode : "nullable" + - name : "txrevnuelevied509" + type : "integer" + mode : "nullable" + - name : "srvcsval509" + type : "integer" + mode : "nullable" + - name : "pubsuppsubtot509" + type : "integer" + mode : "nullable" + - name : "rcvdfrmdisqualsub509" + type : "integer" + mode : "nullable" + - name : "exceeds1pct509" + type : "integer" + mode : "nullable" + - name : "subtotpub509" + type : "integer" + mode : "nullable" + - name : "pubsupplesub509" + type : "integer" + mode : "nullable" + - name : "samepubsuppsubtot509" + type : "integer" + mode : "nullable" + - name : "grsinc509" + type : "integer" + mode : "nullable" + - name : "unreltxincls511tx509" + type : "integer" + mode : "nullable" + - name : "subtotsuppinc509" + type : "integer" + mode : "nullable" + - name : "netincunrelatd509" + type : "integer" + mode : "nullable" + - name : "othrinc509" + type : "integer" + mode : "nullable" + - name : "totsupp509" + type : "integer" + mode : "nullable" + + graph_paths: + - "irs_990_transform_csv >> load_irs_990_to_bq" \ No newline at end of file diff --git a/datasets/irs_990/irs_990_2015/irs_990_2015_dag.py b/datasets/irs_990/irs_990_2015/irs_990_2015_dag.py new file mode 100644 index 000000000..84abe3e3b --- /dev/null +++ b/datasets/irs_990/irs_990_2015/irs_990_2015_dag.py @@ -0,0 +1,315 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_2015", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_transform_csv", + startup_timeout_seconds=600, + name="irs_990_2015", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/15eofinextract990.dat.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_2015/data_output.csv", + "PIPELINE_NAME": "irs_990_2015", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_2015/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_2015", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + {"name": "ein", "type": "string", "mode": "required"}, + {"name": "elf", "type": "string", "mode": "nullable"}, + {"name": "tax_pd", "type": "integer", "mode": "nullable"}, + {"name": "subseccd", "type": "integer", "mode": "nullable"}, + {"name": "s501c3or4947a1cd", "type": "string", "mode": "nullable"}, + {"name": "schdbind", "type": "string", "mode": "nullable"}, + {"name": "politicalactvtscd", "type": "string", "mode": "nullable"}, + {"name": "lbbyingactvtscd", "type": "string", "mode": "nullable"}, + {"name": "subjto6033cd", "type": "string", "mode": "nullable"}, + {"name": "dnradvisedfundscd", "type": "string", "mode": "nullable"}, + {"name": "prptyintrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "maintwrkofartcd", "type": "string", "mode": "nullable"}, + {"name": "crcounselingqstncd", "type": "string", "mode": "nullable"}, + {"name": "hldassetsintermpermcd", "type": "string", "mode": "nullable"}, + {"name": "rptlndbldgeqptcd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstothsecd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstprgrelcd", "type": "string", "mode": "nullable"}, + {"name": "rptothasstcd", "type": "string", "mode": "nullable"}, + {"name": "rptothliabcd", "type": "string", "mode": "nullable"}, + {"name": "sepcnsldtfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "sepindaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "inclinfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "operateschools170cd", "type": "string", "mode": "nullable"}, + {"name": "frgnofficecd", "type": "string", "mode": "nullable"}, + {"name": "frgnrevexpnscd", "type": "string", "mode": "nullable"}, + {"name": "frgngrntscd", "type": "string", "mode": "nullable"}, + {"name": "frgnaggragrntscd", "type": "string", "mode": "nullable"}, + {"name": "rptprofndrsngfeescd", "type": "string", "mode": "nullable"}, + {"name": "rptincfnndrsngcd", "type": "string", "mode": "nullable"}, + {"name": "rptincgamingcd", "type": "string", "mode": "nullable"}, + {"name": "operatehosptlcd", "type": "string", "mode": "nullable"}, + {"name": "hospaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstogovtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstoindvcd", "type": "string", "mode": "nullable"}, + {"name": "rptyestocompnstncd", "type": "string", "mode": "nullable"}, + {"name": "txexmptbndcd", "type": "string", "mode": "nullable"}, + {"name": "invstproceedscd", "type": "string", "mode": "nullable"}, + {"name": "maintescrwaccntcd", "type": "string", "mode": "nullable"}, + {"name": "actonbehalfcd", "type": "string", "mode": "nullable"}, + {"name": "engageexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "awarexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "loantofficercd", "type": "string", "mode": "nullable"}, + {"name": "grantoofficercd", "type": "string", "mode": "nullable"}, + {"name": "dirbusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "fmlybusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "servasofficercd", "type": "string", "mode": "nullable"}, + {"name": "recvnoncashcd", "type": "string", "mode": "nullable"}, + {"name": "recvartcd", "type": "string", "mode": "nullable"}, + {"name": "ceaseoperationscd", "type": "string", "mode": "nullable"}, + {"name": "sellorexchcd", "type": "string", "mode": "nullable"}, + {"name": "ownsepentcd", "type": "string", "mode": "nullable"}, + {"name": "reltdorgcd", "type": "string", "mode": "nullable"}, + {"name": "intincntrlcd", "type": "string", "mode": "nullable"}, + {"name": "orgtrnsfrcd", "type": "string", "mode": "nullable"}, + {"name": "conduct5percentcd", "type": "string", "mode": "nullable"}, + {"name": "compltschocd", "type": "string", "mode": "nullable"}, + {"name": "f1096cnt", "type": "integer", "mode": "nullable"}, + {"name": "fw2gcnt", "type": "integer", "mode": "nullable"}, + {"name": "wthldngrulescd", "type": "string", "mode": "nullable"}, + {"name": "noemplyeesw3cnt", "type": "integer", "mode": "nullable"}, + {"name": "filerqrdrtnscd", "type": "string", "mode": "nullable"}, + {"name": "unrelbusinccd", "type": "string", "mode": "nullable"}, + {"name": "filedf990tcd", "type": "string", "mode": "nullable"}, + {"name": "frgnacctcd", "type": "string", "mode": "nullable"}, + {"name": "prohibtdtxshltrcd", "type": "string", "mode": "nullable"}, + {"name": "prtynotifyorgcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8886tcd", "type": "string", "mode": "nullable"}, + {"name": "solicitcntrbcd", "type": "string", "mode": "nullable"}, + {"name": "exprstmntcd", "type": "string", "mode": "nullable"}, + {"name": "providegoodscd", "type": "string", "mode": "nullable"}, + {"name": "notfydnrvalcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8282cd", "type": "string", "mode": "nullable"}, + {"name": "f8282cnt", "type": "integer", "mode": "nullable"}, + {"name": "fndsrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "premiumspaidcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8899cd", "type": "string", "mode": "nullable"}, + {"name": "filedf1098ccd", "type": "string", "mode": "nullable"}, + {"name": "excbushldngscd", "type": "string", "mode": "nullable"}, + {"name": "s4966distribcd", "type": "string", "mode": "nullable"}, + {"name": "distribtodonorcd", "type": "string", "mode": "nullable"}, + {"name": "initiationfees", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptspublicuse", "type": "integer", "mode": "nullable"}, + {"name": "grsincmembers", "type": "integer", "mode": "nullable"}, + {"name": "grsincother", "type": "integer", "mode": "nullable"}, + {"name": "filedlieuf1041cd", "type": "string", "mode": "nullable"}, + {"name": "txexmptint", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthplncd", "type": "string", "mode": "nullable"}, + {"name": "qualhlthreqmntn", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthonhnd", "type": "integer", "mode": "nullable"}, + {"name": "rcvdpdtngcd", "type": "string", "mode": "nullable"}, + {"name": "filedf720cd", "type": "string", "mode": "nullable"}, + {"name": "totreprtabled", "type": "integer", "mode": "nullable"}, + {"name": "totcomprelatede", "type": "integer", "mode": "nullable"}, + {"name": "totestcompf", "type": "integer", "mode": "nullable"}, + {"name": "noindiv100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "nocontractor100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "totcntrbgfts", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2acd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2acola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2bcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2bcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ccd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ccola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2dcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2dcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ecd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ecola", "type": "integer", "mode": "nullable"}, + {"name": "totrev2fcola", "type": "integer", "mode": "nullable"}, + {"name": "totprgmrevnue", "type": "integer", "mode": "nullable"}, + {"name": "invstmntinc", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsproceeds", "type": "integer", "mode": "nullable"}, + {"name": "royaltsinc", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsreal", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlincreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlincprsnl", "type": "integer", "mode": "nullable"}, + {"name": "netrntlinc", "type": "integer", "mode": "nullable"}, + {"name": "grsalesecur", "type": "integer", "mode": "nullable"}, + {"name": "grsalesothr", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisecur", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisothr", "type": "integer", "mode": "nullable"}, + {"name": "gnlsecur", "type": "integer", "mode": "nullable"}, + {"name": "gnlsothr", "type": "integer", "mode": "nullable"}, + {"name": "netgnls", "type": "integer", "mode": "nullable"}, + {"name": "grsincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "lessdirfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "netincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "grsincgaming", "type": "integer", "mode": "nullable"}, + {"name": "lessdirgaming", "type": "integer", "mode": "nullable"}, + {"name": "netincgaming", "type": "integer", "mode": "nullable"}, + {"name": "grsalesinvent", "type": "integer", "mode": "nullable"}, + {"name": "lesscstofgoods", "type": "integer", "mode": "nullable"}, + {"name": "netincsales", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11acd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtota", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11bcd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11b", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11ccd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11c", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11d", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11e", "type": "integer", "mode": "nullable"}, + {"name": "totrevenue", "type": "integer", "mode": "nullable"}, + {"name": "grntstogovt", "type": "integer", "mode": "nullable"}, + {"name": "grnsttoindiv", "type": "integer", "mode": "nullable"}, + {"name": "grntstofrgngovt", "type": "integer", "mode": "nullable"}, + {"name": "benifitsmembrs", "type": "integer", "mode": "nullable"}, + {"name": "compnsatncurrofcr", "type": "integer", "mode": "nullable"}, + {"name": "compnsatnandothr", "type": "integer", "mode": "nullable"}, + {"name": "othrsalwages", "type": "integer", "mode": "nullable"}, + {"name": "pensionplancontrb", "type": "integer", "mode": "nullable"}, + {"name": "othremplyeebenef", "type": "integer", "mode": "nullable"}, + {"name": "payrolltx", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcmgmt", "type": "integer", "mode": "nullable"}, + {"name": "legalfees", "type": "integer", "mode": "nullable"}, + {"name": "accntingfees", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvclobby", "type": "integer", "mode": "nullable"}, + {"name": "profndraising", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcinvstmgmt", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcothr", "type": "integer", "mode": "nullable"}, + {"name": "advrtpromo", "type": "integer", "mode": "nullable"}, + {"name": "officexpns", "type": "integer", "mode": "nullable"}, + {"name": "infotech", "type": "integer", "mode": "nullable"}, + {"name": "royaltsexpns", "type": "integer", "mode": "nullable"}, + {"name": "occupancy", "type": "integer", "mode": "nullable"}, + {"name": "travel", "type": "integer", "mode": "nullable"}, + {"name": "travelofpublicoffcl", "type": "integer", "mode": "nullable"}, + {"name": "converconventmtng", "type": "integer", "mode": "nullable"}, + {"name": "interestamt", "type": "integer", "mode": "nullable"}, + {"name": "pymtoaffiliates", "type": "integer", "mode": "nullable"}, + {"name": "deprcatndepletn", "type": "integer", "mode": "nullable"}, + {"name": "insurance", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsa", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsb", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsc", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsd", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnse", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsf", "type": "integer", "mode": "nullable"}, + {"name": "totfuncexpns", "type": "integer", "mode": "nullable"}, + {"name": "nonintcashend", "type": "integer", "mode": "nullable"}, + {"name": "svngstempinvend", "type": "integer", "mode": "nullable"}, + {"name": "pldgegrntrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "accntsrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "currfrmrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "rcvbldisqualend", "type": "integer", "mode": "nullable"}, + {"name": "notesloansrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "invntriesalesend", "type": "integer", "mode": "nullable"}, + {"name": "prepaidexpnsend", "type": "integer", "mode": "nullable"}, + {"name": "lndbldgsequipend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsothrend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsprgmend", "type": "integer", "mode": "nullable"}, + {"name": "intangibleassetsend", "type": "integer", "mode": "nullable"}, + {"name": "othrassetsend", "type": "integer", "mode": "nullable"}, + {"name": "totassetsend", "type": "integer", "mode": "nullable"}, + {"name": "accntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "grntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "deferedrevnuend", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsend", "type": "integer", "mode": "nullable"}, + {"name": "escrwaccntliabend", "type": "integer", "mode": "nullable"}, + {"name": "paybletoffcrsend", "type": "integer", "mode": "nullable"}, + {"name": "secrdmrtgsend", "type": "integer", "mode": "nullable"}, + {"name": "unsecurednotesend", "type": "integer", "mode": "nullable"}, + {"name": "othrliabend", "type": "integer", "mode": "nullable"}, + {"name": "totliabend", "type": "integer", "mode": "nullable"}, + {"name": "unrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "temprstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "permrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "capitalstktrstend", "type": "integer", "mode": "nullable"}, + {"name": "paidinsurplusend", "type": "integer", "mode": "nullable"}, + {"name": "retainedearnend", "type": "integer", "mode": "nullable"}, + {"name": "totnetassetend", "type": "integer", "mode": "nullable"}, + {"name": "totnetliabastend", "type": "integer", "mode": "nullable"}, + {"name": "nonpfrea", "type": "integer", "mode": "nullable"}, + {"name": "totnooforgscnt", "type": "integer", "mode": "nullable"}, + {"name": "totsupport", "type": "integer", "mode": "nullable"}, + {"name": "gftgrntsrcvd170", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied170", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval170", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "exceeds2pct170", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesspct170", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "grsinc170", "type": "integer", "mode": "nullable"}, + {"name": "netincunreltd170", "type": "integer", "mode": "nullable"}, + {"name": "othrinc170", "type": "integer", "mode": "nullable"}, + {"name": "totsupp170", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsrelated170", "type": "integer", "mode": "nullable"}, + {"name": "totgftgrntrcvd509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsadmissn509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsactivities509", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied509", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval509", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "rcvdfrmdisqualsub509", "type": "integer", "mode": "nullable"}, + {"name": "exceeds1pct509", "type": "integer", "mode": "nullable"}, + {"name": "subtotpub509", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesub509", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "grsinc509", "type": "integer", "mode": "nullable"}, + {"name": "unreltxincls511tx509", "type": "integer", "mode": "nullable"}, + {"name": "subtotsuppinc509", "type": "integer", "mode": "nullable"}, + {"name": "netincunrelatd509", "type": "integer", "mode": "nullable"}, + {"name": "othrinc509", "type": "integer", "mode": "nullable"}, + {"name": "totsupp509", "type": "integer", "mode": "nullable"}, + ], + ) + + irs_990_transform_csv >> load_irs_990_to_bq diff --git a/datasets/irs_990/irs_990_2015/pipeline.yaml b/datasets/irs_990/irs_990_2015/pipeline.yaml new file mode 100644 index 000000000..2225651d1 --- /dev/null +++ b/datasets/irs_990/irs_990_2015/pipeline.yaml @@ -0,0 +1,847 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_2015 + + # Description of the table + description: "irs_990 2015 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_2015 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" # runs everyday at 7am EST + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_2015" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/15eofinextract990.dat.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_2015/data_output.csv" + PIPELINE_NAME: "irs_990_2015" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"} + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_2015/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_2015" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + schema_fields: + - name : "ein" + type : "string" + mode : "required" + - name : "elf" + type : "string" + mode : "nullable" + - name : "tax_pd" + type : "integer" + mode : "nullable" + - name : "subseccd" + type : "integer" + mode : "nullable" + - name : "s501c3or4947a1cd" + type : "string" + mode : "nullable" + - name : "schdbind" + type : "string" + mode : "nullable" + - name : "politicalactvtscd" + type : "string" + mode : "nullable" + - name : "lbbyingactvtscd" + type : "string" + mode : "nullable" + - name : "subjto6033cd" + type : "string" + mode : "nullable" + - name : "dnradvisedfundscd" + type : "string" + mode : "nullable" + - name : "prptyintrcvdcd" + type : "string" + mode : "nullable" + - name : "maintwrkofartcd" + type : "string" + mode : "nullable" + - name : "crcounselingqstncd" + type : "string" + mode : "nullable" + - name : "hldassetsintermpermcd" + type : "string" + mode : "nullable" + - name : "rptlndbldgeqptcd" + type : "string" + mode : "nullable" + - name : "rptinvstothsecd" + type : "string" + mode : "nullable" + - name : "rptinvstprgrelcd" + type : "string" + mode : "nullable" + - name : "rptothasstcd" + type : "string" + mode : "nullable" + - name : "rptothliabcd" + type : "string" + mode : "nullable" + - name : "sepcnsldtfinstmtcd" + type : "string" + mode : "nullable" + - name : "sepindaudfinstmtcd" + type : "string" + mode : "nullable" + - name : "inclinfinstmtcd" + type : "string" + mode : "nullable" + - name : "operateschools170cd" + type : "string" + mode : "nullable" + - name : "frgnofficecd" + type : "string" + mode : "nullable" + - name : "frgnrevexpnscd" + type : "string" + mode : "nullable" + - name : "frgngrntscd" + type : "string" + mode : "nullable" + - name : "frgnaggragrntscd" + type : "string" + mode : "nullable" + - name : "rptprofndrsngfeescd" + type : "string" + mode : "nullable" + - name : "rptincfnndrsngcd" + type : "string" + mode : "nullable" + - name : "rptincgamingcd" + type : "string" + mode : "nullable" + - name : "operatehosptlcd" + type : "string" + mode : "nullable" + - name : "hospaudfinstmtcd" + type : "string" + mode : "nullable" + - name : "rptgrntstogovtcd" + type : "string" + mode : "nullable" + - name : "rptgrntstoindvcd" + type : "string" + mode : "nullable" + - name : "rptyestocompnstncd" + type : "string" + mode : "nullable" + - name : "txexmptbndcd" + type : "string" + mode : "nullable" + - name : "invstproceedscd" + type : "string" + mode : "nullable" + - name : "maintescrwaccntcd" + type : "string" + mode : "nullable" + - name : "actonbehalfcd" + type : "string" + mode : "nullable" + - name : "engageexcessbnftcd" + type : "string" + mode : "nullable" + - name : "awarexcessbnftcd" + type : "string" + mode : "nullable" + - name : "loantofficercd" + type : "string" + mode : "nullable" + - name : "grantoofficercd" + type : "string" + mode : "nullable" + - name : "dirbusnreltdcd" + type : "string" + mode : "nullable" + - name : "fmlybusnreltdcd" + type : "string" + mode : "nullable" + - name : "servasofficercd" + type : "string" + mode : "nullable" + - name : "recvnoncashcd" + type : "string" + mode : "nullable" + - name : "recvartcd" + type : "string" + mode : "nullable" + - name : "ceaseoperationscd" + type : "string" + mode : "nullable" + - name : "sellorexchcd" + type : "string" + mode : "nullable" + - name : "ownsepentcd" + type : "string" + mode : "nullable" + - name : "reltdorgcd" + type : "string" + mode : "nullable" + - name : "intincntrlcd" + type : "string" + mode : "nullable" + - name : "orgtrnsfrcd" + type : "string" + mode : "nullable" + - name : "conduct5percentcd" + type : "string" + mode : "nullable" + - name : "compltschocd" + type : "string" + mode : "nullable" + - name : "f1096cnt" + type : "integer" + mode : "nullable" + - name : "fw2gcnt" + type : "integer" + mode : "nullable" + - name : "wthldngrulescd" + type : "string" + mode : "nullable" + - name : "noemplyeesw3cnt" + type : "integer" + mode : "nullable" + - name : "filerqrdrtnscd" + type : "string" + mode : "nullable" + - name : "unrelbusinccd" + type : "string" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + mode : "nullable" + - name : "frgnacctcd" + type : "string" + mode : "nullable" + - name : "prohibtdtxshltrcd" + type : "string" + mode : "nullable" + - name : "prtynotifyorgcd" + type : "string" + mode : "nullable" + - name : "filedf8886tcd" + type : "string" + mode : "nullable" + - name : "solicitcntrbcd" + type : "string" + mode : "nullable" + - name : "exprstmntcd" + type : "string" + mode : "nullable" + - name : "providegoodscd" + type : "string" + mode : "nullable" + - name : "notfydnrvalcd" + type : "string" + mode : "nullable" + - name : "filedf8282cd" + type : "string" + mode : "nullable" + - name : "f8282cnt" + type : "integer" + mode : "nullable" + - name : "fndsrcvdcd" + type : "string" + mode : "nullable" + - name : "premiumspaidcd" + type : "string" + mode : "nullable" + - name : "filedf8899cd" + type : "string" + mode : "nullable" + - name : "filedf1098ccd" + type : "string" + mode : "nullable" + - name : "excbushldngscd" + type : "string" + mode : "nullable" + - name : "s4966distribcd" + type : "string" + mode : "nullable" + - name : "distribtodonorcd" + type : "string" + mode : "nullable" + - name : "initiationfees" + type : "integer" + mode : "nullable" + - name : "grsrcptspublicuse" + type : "integer" + mode : "nullable" + - name : "grsincmembers" + type : "integer" + mode : "nullable" + - name : "grsincother" + type : "integer" + mode : "nullable" + - name : "filedlieuf1041cd" + type : "string" + mode : "nullable" + - name : "txexmptint" + type : "integer" + mode : "nullable" + - name : "qualhlthplncd" + type : "string" + mode : "nullable" + - name : "qualhlthreqmntn" + type : "integer" + mode : "nullable" + - name : "qualhlthonhnd" + type : "integer" + mode : "nullable" + - name : "rcvdpdtngcd" + type : "string" + mode : "nullable" + - name : "filedf720cd" + type : "string" + mode : "nullable" + - name : "totreprtabled" + type : "integer" + mode : "nullable" + - name : "totcomprelatede" + type : "integer" + mode : "nullable" + - name : "totestcompf" + type : "integer" + mode : "nullable" + - name : "noindiv100kcnt" + type : "integer" + mode : "nullable" + - name : "nocontractor100kcnt" + type : "integer" + mode : "nullable" + - name : "totcntrbgfts" + type : "integer" + mode : "nullable" + - name : "prgmservcode2acd" + type : "integer" + mode : "nullable" + - name : "totrev2acola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2bcd" + type : "integer" + mode : "nullable" + - name : "totrev2bcola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2ccd" + type : "integer" + mode : "nullable" + - name : "totrev2ccola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2dcd" + type : "integer" + mode : "nullable" + - name : "totrev2dcola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2ecd" + type : "integer" + mode : "nullable" + - name : "totrev2ecola" + type : "integer" + mode : "nullable" + - name : "totrev2fcola" + type : "integer" + mode : "nullable" + - name : "totprgmrevnue" + type : "integer" + mode : "nullable" + - name : "invstmntinc" + type : "integer" + mode : "nullable" + - name : "txexmptbndsproceeds" + type : "integer" + mode : "nullable" + - name : "royaltsinc" + type : "integer" + mode : "nullable" + - name : "grsrntsreal" + type : "integer" + mode : "nullable" + - name : "grsrntsprsnl" + type : "integer" + mode : "nullable" + - name : "rntlexpnsreal" + type : "integer" + mode : "nullable" + - name : "rntlexpnsprsnl" + type : "integer" + mode : "nullable" + - name : "rntlincreal" + type : "integer" + mode : "nullable" + - name : "rntlincprsnl" + type : "integer" + mode : "nullable" + - name : "netrntlinc" + type : "integer" + mode : "nullable" + - name : "grsalesecur" + type : "integer" + mode : "nullable" + - name : "grsalesothr" + type : "integer" + mode : "nullable" + - name : "cstbasisecur" + type : "integer" + mode : "nullable" + - name : "cstbasisothr" + type : "integer" + mode : "nullable" + - name : "gnlsecur" + type : "integer" + mode : "nullable" + - name : "gnlsothr" + type : "integer" + mode : "nullable" + - name : "netgnls" + type : "integer" + mode : "nullable" + - name : "grsincfndrsng" + type : "integer" + mode : "nullable" + - name : "lessdirfndrsng" + type : "integer" + mode : "nullable" + - name : "netincfndrsng" + type : "integer" + mode : "nullable" + - name : "grsincgaming" + type : "integer" + mode : "nullable" + - name : "lessdirgaming" + type : "integer" + mode : "nullable" + - name : "netincgaming" + type : "integer" + mode : "nullable" + - name : "grsalesinvent" + type : "integer" + mode : "nullable" + - name : "lesscstofgoods" + type : "integer" + mode : "nullable" + - name : "netincsales" + type : "integer" + mode : "nullable" + - name : "miscrev11acd" + type : "integer" + mode : "nullable" + - name : "miscrevtota" + type : "integer" + mode : "nullable" + - name : "miscrev11bcd" + type : "integer" + mode : "nullable" + - name : "miscrevtot11b" + type : "integer" + mode : "nullable" + - name : "miscrev11ccd" + type : "integer" + mode : "nullable" + - name : "miscrevtot11c" + type : "integer" + mode : "nullable" + - name : "miscrevtot11d" + type : "integer" + mode : "nullable" + - name : "miscrevtot11e" + type : "integer" + mode : "nullable" + - name : "totrevenue" + type : "integer" + mode : "nullable" + - name : "grntstogovt" + type : "integer" + mode : "nullable" + - name : "grnsttoindiv" + type : "integer" + mode : "nullable" + - name : "grntstofrgngovt" + type : "integer" + mode : "nullable" + - name : "benifitsmembrs" + type : "integer" + mode : "nullable" + - name : "compnsatncurrofcr" + type : "integer" + mode : "nullable" + - name : "compnsatnandothr" + type : "integer" + mode : "nullable" + - name : "othrsalwages" + type : "integer" + mode : "nullable" + - name : "pensionplancontrb" + type : "integer" + mode : "nullable" + - name : "othremplyeebenef" + type : "integer" + mode : "nullable" + - name : "payrolltx" + type : "integer" + mode : "nullable" + - name : "feesforsrvcmgmt" + type : "integer" + mode : "nullable" + - name : "legalfees" + type : "integer" + mode : "nullable" + - name : "accntingfees" + type : "integer" + mode : "nullable" + - name : "feesforsrvclobby" + type : "integer" + mode : "nullable" + - name : "profndraising" + type : "integer" + mode : "nullable" + - name : "feesforsrvcinvstmgmt" + type : "integer" + mode : "nullable" + - name : "feesforsrvcothr" + type : "integer" + mode : "nullable" + - name : "advrtpromo" + type : "integer" + mode : "nullable" + - name : "officexpns" + type : "integer" + mode : "nullable" + - name : "infotech" + type : "integer" + mode : "nullable" + - name : "royaltsexpns" + type : "integer" + mode : "nullable" + - name : "occupancy" + type : "integer" + mode : "nullable" + - name : "travel" + type : "integer" + mode : "nullable" + - name : "travelofpublicoffcl" + type : "integer" + mode : "nullable" + - name : "converconventmtng" + type : "integer" + mode : "nullable" + - name : "interestamt" + type : "integer" + mode : "nullable" + - name : "pymtoaffiliates" + type : "integer" + mode : "nullable" + - name : "deprcatndepletn" + type : "integer" + mode : "nullable" + - name : "insurance" + type : "integer" + mode : "nullable" + - name : "othrexpnsa" + type : "integer" + mode : "nullable" + - name : "othrexpnsb" + type : "integer" + mode : "nullable" + - name : "othrexpnsc" + type : "integer" + mode : "nullable" + - name : "othrexpnsd" + type : "integer" + mode : "nullable" + - name : "othrexpnse" + type : "integer" + mode : "nullable" + - name : "othrexpnsf" + type : "integer" + mode : "nullable" + - name : "totfuncexpns" + type : "integer" + mode : "nullable" + - name : "nonintcashend" + type : "integer" + mode : "nullable" + - name : "svngstempinvend" + type : "integer" + mode : "nullable" + - name : "pldgegrntrcvblend" + type : "integer" + mode : "nullable" + - name : "accntsrcvblend" + type : "integer" + mode : "nullable" + - name : "currfrmrcvblend" + type : "integer" + mode : "nullable" + - name : "rcvbldisqualend" + type : "integer" + mode : "nullable" + - name : "notesloansrcvblend" + type : "integer" + mode : "nullable" + - name : "invntriesalesend" + type : "integer" + mode : "nullable" + - name : "prepaidexpnsend" + type : "integer" + mode : "nullable" + - name : "lndbldgsequipend" + type : "integer" + mode : "nullable" + - name : "invstmntsend" + type : "integer" + mode : "nullable" + - name : "invstmntsothrend" + type : "integer" + mode : "nullable" + - name : "invstmntsprgmend" + type : "integer" + mode : "nullable" + - name : "intangibleassetsend" + type : "integer" + mode : "nullable" + - name : "othrassetsend" + type : "integer" + mode : "nullable" + - name : "totassetsend" + type : "integer" + mode : "nullable" + - name : "accntspayableend" + type : "integer" + mode : "nullable" + - name : "grntspayableend" + type : "integer" + mode : "nullable" + - name : "deferedrevnuend" + type : "integer" + mode : "nullable" + - name : "txexmptbndsend" + type : "integer" + mode : "nullable" + - name : "escrwaccntliabend" + type : "integer" + mode : "nullable" + - name : "paybletoffcrsend" + type : "integer" + mode : "nullable" + - name : "secrdmrtgsend" + type : "integer" + mode : "nullable" + - name : "unsecurednotesend" + type : "integer" + mode : "nullable" + - name : "othrliabend" + type : "integer" + mode : "nullable" + - name : "totliabend" + type : "integer" + mode : "nullable" + - name : "unrstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "temprstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "permrstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "capitalstktrstend" + type : "integer" + mode : "nullable" + - name : "paidinsurplusend" + type : "integer" + mode : "nullable" + - name : "retainedearnend" + type : "integer" + mode : "nullable" + - name : "totnetassetend" + type : "integer" + mode : "nullable" + - name : "totnetliabastend" + type : "integer" + mode : "nullable" + - name : "nonpfrea" + type : "integer" + mode : "nullable" + - name : "totnooforgscnt" + type : "integer" + mode : "nullable" + - name : "totsupport" + type : "integer" + mode : "nullable" + - name : "gftgrntsrcvd170" + type : "integer" + mode : "nullable" + - name : "txrevnuelevied170" + type : "integer" + mode : "nullable" + - name : "srvcsval170" + type : "integer" + mode : "nullable" + - name : "pubsuppsubtot170" + type : "integer" + mode : "nullable" + - name : "exceeds2pct170" + type : "integer" + mode : "nullable" + - name : "pubsupplesspct170" + type : "integer" + mode : "nullable" + - name : "samepubsuppsubtot170" + type : "integer" + mode : "nullable" + - name : "grsinc170" + type : "integer" + mode : "nullable" + - name : "netincunreltd170" + type : "integer" + mode : "nullable" + - name : "othrinc170" + type : "integer" + mode : "nullable" + - name : "totsupp170" + type : "integer" + mode : "nullable" + - name : "grsrcptsrelated170" + type : "integer" + mode : "nullable" + - name : "totgftgrntrcvd509" + type : "integer" + mode : "nullable" + - name : "grsrcptsadmissn509" + type : "integer" + mode : "nullable" + - name : "grsrcptsactivities509" + type : "integer" + mode : "nullable" + - name : "txrevnuelevied509" + type : "integer" + mode : "nullable" + - name : "srvcsval509" + type : "integer" + mode : "nullable" + - name : "pubsuppsubtot509" + type : "integer" + mode : "nullable" + - name : "rcvdfrmdisqualsub509" + type : "integer" + mode : "nullable" + - name : "exceeds1pct509" + type : "integer" + mode : "nullable" + - name : "subtotpub509" + type : "integer" + mode : "nullable" + - name : "pubsupplesub509" + type : "integer" + mode : "nullable" + - name : "samepubsuppsubtot509" + type : "integer" + mode : "nullable" + - name : "grsinc509" + type : "integer" + mode : "nullable" + - name : "unreltxincls511tx509" + type : "integer" + mode : "nullable" + - name : "subtotsuppinc509" + type : "integer" + mode : "nullable" + - name : "netincunrelatd509" + type : "integer" + mode : "nullable" + - name : "othrinc509" + type : "integer" + mode : "nullable" + - name : "totsupp509" + type : "integer" + mode : "nullable" + + graph_paths: + - "irs_990_transform_csv >> load_irs_990_to_bq" \ No newline at end of file diff --git a/datasets/irs_990/irs_990_2016/irs_990_2016_dag.py b/datasets/irs_990/irs_990_2016/irs_990_2016_dag.py new file mode 100644 index 000000000..de26999dd --- /dev/null +++ b/datasets/irs_990/irs_990_2016/irs_990_2016_dag.py @@ -0,0 +1,315 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_2016", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_2016_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_2016_transform_csv", + startup_timeout_seconds=600, + name="irs_990_2016", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/16eofinextract990.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_2016/data_output.csv", + "PIPELINE_NAME": "irs_990_2016", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_2016_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_2016_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_2016/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_2016", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + {"name": "ein", "type": "string", "mode": "required"}, + {"name": "elf", "type": "string", "mode": "nullable"}, + {"name": "tax_pd", "type": "integer", "mode": "nullable"}, + {"name": "subseccd", "type": "integer", "mode": "nullable"}, + {"name": "s501c3or4947a1cd", "type": "string", "mode": "nullable"}, + {"name": "schdbind", "type": "string", "mode": "nullable"}, + {"name": "politicalactvtscd", "type": "string", "mode": "nullable"}, + {"name": "lbbyingactvtscd", "type": "string", "mode": "nullable"}, + {"name": "subjto6033cd", "type": "string", "mode": "nullable"}, + {"name": "dnradvisedfundscd", "type": "string", "mode": "nullable"}, + {"name": "prptyintrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "maintwrkofartcd", "type": "string", "mode": "nullable"}, + {"name": "crcounselingqstncd", "type": "string", "mode": "nullable"}, + {"name": "hldassetsintermpermcd", "type": "string", "mode": "nullable"}, + {"name": "rptlndbldgeqptcd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstothsecd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstprgrelcd", "type": "string", "mode": "nullable"}, + {"name": "rptothasstcd", "type": "string", "mode": "nullable"}, + {"name": "rptothliabcd", "type": "string", "mode": "nullable"}, + {"name": "sepcnsldtfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "sepindaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "inclinfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "operateschools170cd", "type": "string", "mode": "nullable"}, + {"name": "frgnofficecd", "type": "string", "mode": "nullable"}, + {"name": "frgnrevexpnscd", "type": "string", "mode": "nullable"}, + {"name": "frgngrntscd", "type": "string", "mode": "nullable"}, + {"name": "frgnaggragrntscd", "type": "string", "mode": "nullable"}, + {"name": "rptprofndrsngfeescd", "type": "string", "mode": "nullable"}, + {"name": "rptincfnndrsngcd", "type": "string", "mode": "nullable"}, + {"name": "rptincgamingcd", "type": "string", "mode": "nullable"}, + {"name": "operatehosptlcd", "type": "string", "mode": "nullable"}, + {"name": "hospaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstogovtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstoindvcd", "type": "string", "mode": "nullable"}, + {"name": "rptyestocompnstncd", "type": "string", "mode": "nullable"}, + {"name": "txexmptbndcd", "type": "string", "mode": "nullable"}, + {"name": "invstproceedscd", "type": "string", "mode": "nullable"}, + {"name": "maintescrwaccntcd", "type": "string", "mode": "nullable"}, + {"name": "actonbehalfcd", "type": "string", "mode": "nullable"}, + {"name": "engageexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "awarexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "loantofficercd", "type": "string", "mode": "nullable"}, + {"name": "grantoofficercd", "type": "string", "mode": "nullable"}, + {"name": "dirbusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "fmlybusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "servasofficercd", "type": "string", "mode": "nullable"}, + {"name": "recvnoncashcd", "type": "string", "mode": "nullable"}, + {"name": "recvartcd", "type": "string", "mode": "nullable"}, + {"name": "ceaseoperationscd", "type": "string", "mode": "nullable"}, + {"name": "sellorexchcd", "type": "string", "mode": "nullable"}, + {"name": "ownsepentcd", "type": "string", "mode": "nullable"}, + {"name": "reltdorgcd", "type": "string", "mode": "nullable"}, + {"name": "intincntrlcd", "type": "string", "mode": "nullable"}, + {"name": "orgtrnsfrcd", "type": "string", "mode": "nullable"}, + {"name": "conduct5percentcd", "type": "string", "mode": "nullable"}, + {"name": "compltschocd", "type": "string", "mode": "nullable"}, + {"name": "f1096cnt", "type": "integer", "mode": "nullable"}, + {"name": "fw2gcnt", "type": "integer", "mode": "nullable"}, + {"name": "wthldngrulescd", "type": "string", "mode": "nullable"}, + {"name": "noemplyeesw3cnt", "type": "integer", "mode": "nullable"}, + {"name": "filerqrdrtnscd", "type": "string", "mode": "nullable"}, + {"name": "unrelbusinccd", "type": "string", "mode": "nullable"}, + {"name": "filedf990tcd", "type": "string", "mode": "nullable"}, + {"name": "frgnacctcd", "type": "string", "mode": "nullable"}, + {"name": "prohibtdtxshltrcd", "type": "string", "mode": "nullable"}, + {"name": "prtynotifyorgcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8886tcd", "type": "string", "mode": "nullable"}, + {"name": "solicitcntrbcd", "type": "string", "mode": "nullable"}, + {"name": "exprstmntcd", "type": "string", "mode": "nullable"}, + {"name": "providegoodscd", "type": "string", "mode": "nullable"}, + {"name": "notfydnrvalcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8282cd", "type": "string", "mode": "nullable"}, + {"name": "f8282cnt", "type": "integer", "mode": "nullable"}, + {"name": "fndsrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "premiumspaidcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8899cd", "type": "string", "mode": "nullable"}, + {"name": "filedf1098ccd", "type": "string", "mode": "nullable"}, + {"name": "excbushldngscd", "type": "string", "mode": "nullable"}, + {"name": "s4966distribcd", "type": "string", "mode": "nullable"}, + {"name": "distribtodonorcd", "type": "string", "mode": "nullable"}, + {"name": "initiationfees", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptspublicuse", "type": "integer", "mode": "nullable"}, + {"name": "grsincmembers", "type": "integer", "mode": "nullable"}, + {"name": "grsincother", "type": "integer", "mode": "nullable"}, + {"name": "filedlieuf1041cd", "type": "string", "mode": "nullable"}, + {"name": "txexmptint", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthplncd", "type": "string", "mode": "nullable"}, + {"name": "qualhlthreqmntn", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthonhnd", "type": "integer", "mode": "nullable"}, + {"name": "rcvdpdtngcd", "type": "string", "mode": "nullable"}, + {"name": "filedf720cd", "type": "string", "mode": "nullable"}, + {"name": "totreprtabled", "type": "integer", "mode": "nullable"}, + {"name": "totcomprelatede", "type": "integer", "mode": "nullable"}, + {"name": "totestcompf", "type": "integer", "mode": "nullable"}, + {"name": "noindiv100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "nocontractor100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "totcntrbgfts", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2acd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2acola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2bcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2bcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ccd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ccola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2dcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2dcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ecd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ecola", "type": "integer", "mode": "nullable"}, + {"name": "totrev2fcola", "type": "integer", "mode": "nullable"}, + {"name": "totprgmrevnue", "type": "integer", "mode": "nullable"}, + {"name": "invstmntinc", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsproceeds", "type": "integer", "mode": "nullable"}, + {"name": "royaltsinc", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsreal", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlincreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlincprsnl", "type": "integer", "mode": "nullable"}, + {"name": "netrntlinc", "type": "integer", "mode": "nullable"}, + {"name": "grsalesecur", "type": "integer", "mode": "nullable"}, + {"name": "grsalesothr", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisecur", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisothr", "type": "integer", "mode": "nullable"}, + {"name": "gnlsecur", "type": "integer", "mode": "nullable"}, + {"name": "gnlsothr", "type": "integer", "mode": "nullable"}, + {"name": "netgnls", "type": "integer", "mode": "nullable"}, + {"name": "grsincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "lessdirfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "netincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "grsincgaming", "type": "integer", "mode": "nullable"}, + {"name": "lessdirgaming", "type": "integer", "mode": "nullable"}, + {"name": "netincgaming", "type": "integer", "mode": "nullable"}, + {"name": "grsalesinvent", "type": "integer", "mode": "nullable"}, + {"name": "lesscstofgoods", "type": "integer", "mode": "nullable"}, + {"name": "netincsales", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11acd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtota", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11bcd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11b", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11ccd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11c", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11d", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11e", "type": "integer", "mode": "nullable"}, + {"name": "totrevenue", "type": "integer", "mode": "nullable"}, + {"name": "grntstogovt", "type": "integer", "mode": "nullable"}, + {"name": "grnsttoindiv", "type": "integer", "mode": "nullable"}, + {"name": "grntstofrgngovt", "type": "integer", "mode": "nullable"}, + {"name": "benifitsmembrs", "type": "integer", "mode": "nullable"}, + {"name": "compnsatncurrofcr", "type": "integer", "mode": "nullable"}, + {"name": "compnsatnandothr", "type": "integer", "mode": "nullable"}, + {"name": "othrsalwages", "type": "integer", "mode": "nullable"}, + {"name": "pensionplancontrb", "type": "integer", "mode": "nullable"}, + {"name": "othremplyeebenef", "type": "integer", "mode": "nullable"}, + {"name": "payrolltx", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcmgmt", "type": "integer", "mode": "nullable"}, + {"name": "legalfees", "type": "integer", "mode": "nullable"}, + {"name": "accntingfees", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvclobby", "type": "integer", "mode": "nullable"}, + {"name": "profndraising", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcinvstmgmt", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcothr", "type": "integer", "mode": "nullable"}, + {"name": "advrtpromo", "type": "integer", "mode": "nullable"}, + {"name": "officexpns", "type": "integer", "mode": "nullable"}, + {"name": "infotech", "type": "integer", "mode": "nullable"}, + {"name": "royaltsexpns", "type": "integer", "mode": "nullable"}, + {"name": "occupancy", "type": "integer", "mode": "nullable"}, + {"name": "travel", "type": "integer", "mode": "nullable"}, + {"name": "travelofpublicoffcl", "type": "integer", "mode": "nullable"}, + {"name": "converconventmtng", "type": "integer", "mode": "nullable"}, + {"name": "interestamt", "type": "integer", "mode": "nullable"}, + {"name": "pymtoaffiliates", "type": "integer", "mode": "nullable"}, + {"name": "deprcatndepletn", "type": "integer", "mode": "nullable"}, + {"name": "insurance", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsa", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsb", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsc", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsd", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnse", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsf", "type": "integer", "mode": "nullable"}, + {"name": "totfuncexpns", "type": "integer", "mode": "nullable"}, + {"name": "nonintcashend", "type": "integer", "mode": "nullable"}, + {"name": "svngstempinvend", "type": "integer", "mode": "nullable"}, + {"name": "pldgegrntrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "accntsrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "currfrmrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "rcvbldisqualend", "type": "integer", "mode": "nullable"}, + {"name": "notesloansrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "invntriesalesend", "type": "integer", "mode": "nullable"}, + {"name": "prepaidexpnsend", "type": "integer", "mode": "nullable"}, + {"name": "lndbldgsequipend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsothrend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsprgmend", "type": "integer", "mode": "nullable"}, + {"name": "intangibleassetsend", "type": "integer", "mode": "nullable"}, + {"name": "othrassetsend", "type": "integer", "mode": "nullable"}, + {"name": "totassetsend", "type": "integer", "mode": "nullable"}, + {"name": "accntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "grntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "deferedrevnuend", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsend", "type": "integer", "mode": "nullable"}, + {"name": "escrwaccntliabend", "type": "integer", "mode": "nullable"}, + {"name": "paybletoffcrsend", "type": "integer", "mode": "nullable"}, + {"name": "secrdmrtgsend", "type": "integer", "mode": "nullable"}, + {"name": "unsecurednotesend", "type": "integer", "mode": "nullable"}, + {"name": "othrliabend", "type": "integer", "mode": "nullable"}, + {"name": "totliabend", "type": "integer", "mode": "nullable"}, + {"name": "unrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "temprstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "permrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "capitalstktrstend", "type": "integer", "mode": "nullable"}, + {"name": "paidinsurplusend", "type": "integer", "mode": "nullable"}, + {"name": "retainedearnend", "type": "integer", "mode": "nullable"}, + {"name": "totnetassetend", "type": "integer", "mode": "nullable"}, + {"name": "totnetliabastend", "type": "integer", "mode": "nullable"}, + {"name": "nonpfrea", "type": "integer", "mode": "nullable"}, + {"name": "totnooforgscnt", "type": "integer", "mode": "nullable"}, + {"name": "totsupport", "type": "integer", "mode": "nullable"}, + {"name": "gftgrntsrcvd170", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied170", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval170", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "exceeds2pct170", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesspct170", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "grsinc170", "type": "integer", "mode": "nullable"}, + {"name": "netincunreltd170", "type": "integer", "mode": "nullable"}, + {"name": "othrinc170", "type": "integer", "mode": "nullable"}, + {"name": "totsupp170", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsrelated170", "type": "integer", "mode": "nullable"}, + {"name": "totgftgrntrcvd509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsadmissn509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsactivities509", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied509", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval509", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "rcvdfrmdisqualsub509", "type": "integer", "mode": "nullable"}, + {"name": "exceeds1pct509", "type": "integer", "mode": "nullable"}, + {"name": "subtotpub509", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesub509", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "grsinc509", "type": "integer", "mode": "nullable"}, + {"name": "unreltxincls511tx509", "type": "integer", "mode": "nullable"}, + {"name": "subtotsuppinc509", "type": "integer", "mode": "nullable"}, + {"name": "netincunrelatd509", "type": "integer", "mode": "nullable"}, + {"name": "othrinc509", "type": "integer", "mode": "nullable"}, + {"name": "totsupp509", "type": "integer", "mode": "nullable"}, + ], + ) + + irs_990_2016_transform_csv >> load_irs_990_2016_to_bq diff --git a/datasets/irs_990/irs_990_2016/pipeline.yaml b/datasets/irs_990/irs_990_2016/pipeline.yaml new file mode 100644 index 000000000..28eb24828 --- /dev/null +++ b/datasets/irs_990/irs_990_2016/pipeline.yaml @@ -0,0 +1,853 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_2016 + + # Description of the table + description: "irs_990_2016 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_2016 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_2016_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_2016" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/16eofinextract990.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_2016/data_output.csv" + PIPELINE_NAME: "irs_990_2016" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_2016_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_2016/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_2016" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + schema_fields: + - name : "ein" + type : "string" + mode : "required" + - name : "elf" + type : "string" + mode : "nullable" + - name : "tax_pd" + type : "integer" + mode : "nullable" + - name : "subseccd" + type : "integer" + mode : "nullable" + - name : "s501c3or4947a1cd" + type : "string" + mode : "nullable" + - name : "schdbind" + type : "string" + mode : "nullable" + - name : "politicalactvtscd" + type : "string" + mode : "nullable" + - name : "lbbyingactvtscd" + type : "string" + mode : "nullable" + - name : "subjto6033cd" + type : "string" + mode : "nullable" + - name : "dnradvisedfundscd" + type : "string" + mode : "nullable" + - name : "prptyintrcvdcd" + type : "string" + mode : "nullable" + - name : "maintwrkofartcd" + type : "string" + mode : "nullable" + - name : "crcounselingqstncd" + type : "string" + mode : "nullable" + - name : "hldassetsintermpermcd" + type : "string" + mode : "nullable" + - name : "rptlndbldgeqptcd" + type : "string" + mode : "nullable" + - name : "rptinvstothsecd" + type : "string" + mode : "nullable" + - name : "rptinvstprgrelcd" + type : "string" + mode : "nullable" + - name : "rptothasstcd" + type : "string" + mode : "nullable" + - name : "rptothliabcd" + type : "string" + mode : "nullable" + - name : "sepcnsldtfinstmtcd" + type : "string" + mode : "nullable" + - name : "sepindaudfinstmtcd" + type : "string" + mode : "nullable" + - name : "inclinfinstmtcd" + type : "string" + mode : "nullable" + - name : "operateschools170cd" + type : "string" + mode : "nullable" + - name : "frgnofficecd" + type : "string" + mode : "nullable" + - name : "frgnrevexpnscd" + type : "string" + mode : "nullable" + - name : "frgngrntscd" + type : "string" + mode : "nullable" + - name : "frgnaggragrntscd" + type : "string" + mode : "nullable" + - name : "rptprofndrsngfeescd" + type : "string" + mode : "nullable" + - name : "rptincfnndrsngcd" + type : "string" + mode : "nullable" + - name : "rptincgamingcd" + type : "string" + mode : "nullable" + - name : "operatehosptlcd" + type : "string" + mode : "nullable" + - name : "hospaudfinstmtcd" + type : "string" + mode : "nullable" + - name : "rptgrntstogovtcd" + type : "string" + mode : "nullable" + - name : "rptgrntstoindvcd" + type : "string" + mode : "nullable" + - name : "rptyestocompnstncd" + type : "string" + mode : "nullable" + - name : "txexmptbndcd" + type : "string" + mode : "nullable" + - name : "invstproceedscd" + type : "string" + mode : "nullable" + - name : "maintescrwaccntcd" + type : "string" + mode : "nullable" + - name : "actonbehalfcd" + type : "string" + mode : "nullable" + - name : "engageexcessbnftcd" + type : "string" + mode : "nullable" + - name : "awarexcessbnftcd" + type : "string" + mode : "nullable" + - name : "loantofficercd" + type : "string" + mode : "nullable" + - name : "grantoofficercd" + type : "string" + mode : "nullable" + - name : "dirbusnreltdcd" + type : "string" + mode : "nullable" + - name : "fmlybusnreltdcd" + type : "string" + mode : "nullable" + - name : "servasofficercd" + type : "string" + mode : "nullable" + - name : "recvnoncashcd" + type : "string" + mode : "nullable" + - name : "recvartcd" + type : "string" + mode : "nullable" + - name : "ceaseoperationscd" + type : "string" + mode : "nullable" + - name : "sellorexchcd" + type : "string" + mode : "nullable" + - name : "ownsepentcd" + type : "string" + mode : "nullable" + - name : "reltdorgcd" + type : "string" + mode : "nullable" + - name : "intincntrlcd" + type : "string" + mode : "nullable" + - name : "orgtrnsfrcd" + type : "string" + mode : "nullable" + - name : "conduct5percentcd" + type : "string" + mode : "nullable" + - name : "compltschocd" + type : "string" + mode : "nullable" + - name : "f1096cnt" + type : "integer" + mode : "nullable" + - name : "fw2gcnt" + type : "integer" + mode : "nullable" + - name : "wthldngrulescd" + type : "string" + mode : "nullable" + - name : "noemplyeesw3cnt" + type : "integer" + mode : "nullable" + - name : "filerqrdrtnscd" + type : "string" + mode : "nullable" + - name : "unrelbusinccd" + type : "string" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + mode : "nullable" + - name : "frgnacctcd" + type : "string" + mode : "nullable" + - name : "prohibtdtxshltrcd" + type : "string" + mode : "nullable" + - name : "prtynotifyorgcd" + type : "string" + mode : "nullable" + - name : "filedf8886tcd" + type : "string" + mode : "nullable" + - name : "solicitcntrbcd" + type : "string" + mode : "nullable" + - name : "exprstmntcd" + type : "string" + mode : "nullable" + - name : "providegoodscd" + type : "string" + mode : "nullable" + - name : "notfydnrvalcd" + type : "string" + mode : "nullable" + - name : "filedf8282cd" + type : "string" + mode : "nullable" + - name : "f8282cnt" + type : "integer" + mode : "nullable" + - name : "fndsrcvdcd" + type : "string" + mode : "nullable" + - name : "premiumspaidcd" + type : "string" + mode : "nullable" + - name : "filedf8899cd" + type : "string" + mode : "nullable" + - name : "filedf1098ccd" + type : "string" + mode : "nullable" + - name : "excbushldngscd" + type : "string" + mode : "nullable" + - name : "s4966distribcd" + type : "string" + mode : "nullable" + - name : "distribtodonorcd" + type : "string" + mode : "nullable" + - name : "initiationfees" + type : "integer" + mode : "nullable" + - name : "grsrcptspublicuse" + type : "integer" + mode : "nullable" + - name : "grsincmembers" + type : "integer" + mode : "nullable" + - name : "grsincother" + type : "integer" + mode : "nullable" + - name : "filedlieuf1041cd" + type : "string" + mode : "nullable" + - name : "txexmptint" + type : "integer" + mode : "nullable" + - name : "qualhlthplncd" + type : "string" + mode : "nullable" + - name : "qualhlthreqmntn" + type : "integer" + mode : "nullable" + - name : "qualhlthonhnd" + type : "integer" + mode : "nullable" + - name : "rcvdpdtngcd" + type : "string" + mode : "nullable" + - name : "filedf720cd" + type : "string" + mode : "nullable" + - name : "totreprtabled" + type : "integer" + mode : "nullable" + - name : "totcomprelatede" + type : "integer" + mode : "nullable" + - name : "totestcompf" + type : "integer" + mode : "nullable" + - name : "noindiv100kcnt" + type : "integer" + mode : "nullable" + - name : "nocontractor100kcnt" + type : "integer" + mode : "nullable" + - name : "totcntrbgfts" + type : "integer" + mode : "nullable" + - name : "prgmservcode2acd" + type : "integer" + mode : "nullable" + - name : "totrev2acola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2bcd" + type : "integer" + mode : "nullable" + - name : "totrev2bcola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2ccd" + type : "integer" + mode : "nullable" + - name : "totrev2ccola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2dcd" + type : "integer" + mode : "nullable" + - name : "totrev2dcola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2ecd" + type : "integer" + mode : "nullable" + - name : "totrev2ecola" + type : "integer" + mode : "nullable" + - name : "totrev2fcola" + type : "integer" + mode : "nullable" + - name : "totprgmrevnue" + type : "integer" + mode : "nullable" + - name : "invstmntinc" + type : "integer" + mode : "nullable" + - name : "txexmptbndsproceeds" + type : "integer" + mode : "nullable" + - name : "royaltsinc" + type : "integer" + mode : "nullable" + - name : "grsrntsreal" + type : "integer" + mode : "nullable" + - name : "grsrntsprsnl" + type : "integer" + mode : "nullable" + - name : "rntlexpnsreal" + type : "integer" + mode : "nullable" + - name : "rntlexpnsprsnl" + type : "integer" + mode : "nullable" + - name : "rntlincreal" + type : "integer" + mode : "nullable" + - name : "rntlincprsnl" + type : "integer" + mode : "nullable" + - name : "netrntlinc" + type : "integer" + mode : "nullable" + - name : "grsalesecur" + type : "integer" + mode : "nullable" + - name : "grsalesothr" + type : "integer" + mode : "nullable" + - name : "cstbasisecur" + type : "integer" + mode : "nullable" + - name : "cstbasisothr" + type : "integer" + mode : "nullable" + - name : "gnlsecur" + type : "integer" + mode : "nullable" + - name : "gnlsothr" + type : "integer" + mode : "nullable" + - name : "netgnls" + type : "integer" + mode : "nullable" + - name : "grsincfndrsng" + type : "integer" + mode : "nullable" + - name : "lessdirfndrsng" + type : "integer" + mode : "nullable" + - name : "netincfndrsng" + type : "integer" + mode : "nullable" + - name : "grsincgaming" + type : "integer" + mode : "nullable" + - name : "lessdirgaming" + type : "integer" + mode : "nullable" + - name : "netincgaming" + type : "integer" + mode : "nullable" + - name : "grsalesinvent" + type : "integer" + mode : "nullable" + - name : "lesscstofgoods" + type : "integer" + mode : "nullable" + - name : "netincsales" + type : "integer" + mode : "nullable" + - name : "miscrev11acd" + type : "integer" + mode : "nullable" + - name : "miscrevtota" + type : "integer" + mode : "nullable" + - name : "miscrev11bcd" + type : "integer" + mode : "nullable" + - name : "miscrevtot11b" + type : "integer" + mode : "nullable" + - name : "miscrev11ccd" + type : "integer" + mode : "nullable" + - name : "miscrevtot11c" + type : "integer" + mode : "nullable" + - name : "miscrevtot11d" + type : "integer" + mode : "nullable" + - name : "miscrevtot11e" + type : "integer" + mode : "nullable" + - name : "totrevenue" + type : "integer" + mode : "nullable" + - name : "grntstogovt" + type : "integer" + mode : "nullable" + - name : "grnsttoindiv" + type : "integer" + mode : "nullable" + - name : "grntstofrgngovt" + type : "integer" + mode : "nullable" + - name : "benifitsmembrs" + type : "integer" + mode : "nullable" + - name : "compnsatncurrofcr" + type : "integer" + mode : "nullable" + - name : "compnsatnandothr" + type : "integer" + mode : "nullable" + - name : "othrsalwages" + type : "integer" + mode : "nullable" + - name : "pensionplancontrb" + type : "integer" + mode : "nullable" + - name : "othremplyeebenef" + type : "integer" + mode : "nullable" + - name : "payrolltx" + type : "integer" + mode : "nullable" + - name : "feesforsrvcmgmt" + type : "integer" + mode : "nullable" + - name : "legalfees" + type : "integer" + mode : "nullable" + - name : "accntingfees" + type : "integer" + mode : "nullable" + - name : "feesforsrvclobby" + type : "integer" + mode : "nullable" + - name : "profndraising" + type : "integer" + mode : "nullable" + - name : "feesforsrvcinvstmgmt" + type : "integer" + mode : "nullable" + - name : "feesforsrvcothr" + type : "integer" + mode : "nullable" + - name : "advrtpromo" + type : "integer" + mode : "nullable" + - name : "officexpns" + type : "integer" + mode : "nullable" + - name : "infotech" + type : "integer" + mode : "nullable" + - name : "royaltsexpns" + type : "integer" + mode : "nullable" + - name : "occupancy" + type : "integer" + mode : "nullable" + - name : "travel" + type : "integer" + mode : "nullable" + - name : "travelofpublicoffcl" + type : "integer" + mode : "nullable" + - name : "converconventmtng" + type : "integer" + mode : "nullable" + - name : "interestamt" + type : "integer" + mode : "nullable" + - name : "pymtoaffiliates" + type : "integer" + mode : "nullable" + - name : "deprcatndepletn" + type : "integer" + mode : "nullable" + - name : "insurance" + type : "integer" + mode : "nullable" + - name : "othrexpnsa" + type : "integer" + mode : "nullable" + - name : "othrexpnsb" + type : "integer" + mode : "nullable" + - name : "othrexpnsc" + type : "integer" + mode : "nullable" + - name : "othrexpnsd" + type : "integer" + mode : "nullable" + - name : "othrexpnse" + type : "integer" + mode : "nullable" + - name : "othrexpnsf" + type : "integer" + mode : "nullable" + - name : "totfuncexpns" + type : "integer" + mode : "nullable" + - name : "nonintcashend" + type : "integer" + mode : "nullable" + - name : "svngstempinvend" + type : "integer" + mode : "nullable" + - name : "pldgegrntrcvblend" + type : "integer" + mode : "nullable" + - name : "accntsrcvblend" + type : "integer" + mode : "nullable" + - name : "currfrmrcvblend" + type : "integer" + mode : "nullable" + - name : "rcvbldisqualend" + type : "integer" + mode : "nullable" + - name : "notesloansrcvblend" + type : "integer" + mode : "nullable" + - name : "invntriesalesend" + type : "integer" + mode : "nullable" + - name : "prepaidexpnsend" + type : "integer" + mode : "nullable" + - name : "lndbldgsequipend" + type : "integer" + mode : "nullable" + - name : "invstmntsend" + type : "integer" + mode : "nullable" + - name : "invstmntsothrend" + type : "integer" + mode : "nullable" + - name : "invstmntsprgmend" + type : "integer" + mode : "nullable" + - name : "intangibleassetsend" + type : "integer" + mode : "nullable" + - name : "othrassetsend" + type : "integer" + mode : "nullable" + - name : "totassetsend" + type : "integer" + mode : "nullable" + - name : "accntspayableend" + type : "integer" + mode : "nullable" + - name : "grntspayableend" + type : "integer" + mode : "nullable" + - name : "deferedrevnuend" + type : "integer" + mode : "nullable" + - name : "txexmptbndsend" + type : "integer" + mode : "nullable" + - name : "escrwaccntliabend" + type : "integer" + mode : "nullable" + - name : "paybletoffcrsend" + type : "integer" + mode : "nullable" + - name : "secrdmrtgsend" + type : "integer" + mode : "nullable" + - name : "unsecurednotesend" + type : "integer" + mode : "nullable" + - name : "othrliabend" + type : "integer" + mode : "nullable" + - name : "totliabend" + type : "integer" + mode : "nullable" + - name : "unrstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "temprstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "permrstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "capitalstktrstend" + type : "integer" + mode : "nullable" + - name : "paidinsurplusend" + type : "integer" + mode : "nullable" + - name : "retainedearnend" + type : "integer" + mode : "nullable" + - name : "totnetassetend" + type : "integer" + mode : "nullable" + - name : "totnetliabastend" + type : "integer" + mode : "nullable" + - name : "nonpfrea" + type : "integer" + mode : "nullable" + - name : "totnooforgscnt" + type : "integer" + mode : "nullable" + - name : "totsupport" + type : "integer" + mode : "nullable" + - name : "gftgrntsrcvd170" + type : "integer" + mode : "nullable" + - name : "txrevnuelevied170" + type : "integer" + mode : "nullable" + - name : "srvcsval170" + type : "integer" + mode : "nullable" + - name : "pubsuppsubtot170" + type : "integer" + mode : "nullable" + - name : "exceeds2pct170" + type : "integer" + mode : "nullable" + - name : "pubsupplesspct170" + type : "integer" + mode : "nullable" + - name : "samepubsuppsubtot170" + type : "integer" + mode : "nullable" + - name : "grsinc170" + type : "integer" + mode : "nullable" + - name : "netincunreltd170" + type : "integer" + mode : "nullable" + - name : "othrinc170" + type : "integer" + mode : "nullable" + - name : "totsupp170" + type : "integer" + mode : "nullable" + - name : "grsrcptsrelated170" + type : "integer" + mode : "nullable" + - name : "totgftgrntrcvd509" + type : "integer" + mode : "nullable" + - name : "grsrcptsadmissn509" + type : "integer" + mode : "nullable" + - name : "grsrcptsactivities509" + type : "integer" + mode : "nullable" + - name : "txrevnuelevied509" + type : "integer" + mode : "nullable" + - name : "srvcsval509" + type : "integer" + mode : "nullable" + - name : "pubsuppsubtot509" + type : "integer" + mode : "nullable" + - name : "rcvdfrmdisqualsub509" + type : "integer" + mode : "nullable" + - name : "exceeds1pct509" + type : "integer" + mode : "nullable" + - name : "subtotpub509" + type : "integer" + mode : "nullable" + - name : "pubsupplesub509" + type : "integer" + mode : "nullable" + - name : "samepubsuppsubtot509" + type : "integer" + mode : "nullable" + - name : "grsinc509" + type : "integer" + mode : "nullable" + - name : "unreltxincls511tx509" + type : "integer" + mode : "nullable" + - name : "subtotsuppinc509" + type : "integer" + mode : "nullable" + - name : "netincunrelatd509" + type : "integer" + mode : "nullable" + - name : "othrinc509" + type : "integer" + mode : "nullable" + - name : "totsupp509" + type : "integer" + mode : "nullable" + + graph_paths: + - "irs_990_2016_transform_csv >> load_irs_990_2016_to_bq" + + + + + \ No newline at end of file diff --git a/datasets/irs_990/irs_990_2017/irs_990_2017_dag.py b/datasets/irs_990/irs_990_2017/irs_990_2017_dag.py new file mode 100644 index 000000000..22f04f976 --- /dev/null +++ b/datasets/irs_990/irs_990_2017/irs_990_2017_dag.py @@ -0,0 +1,315 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_2017", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_2017_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_2017_transform_csv", + startup_timeout_seconds=600, + name="irs_990_2017", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/17eofinextract990.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_2017/data_output.csv", + "PIPELINE_NAME": "irs_990_2017", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_2017_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_2017_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_2017/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_2017", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + {"name": "ein", "type": "string", "mode": "required"}, + {"name": "elf", "type": "string", "mode": "nullable"}, + {"name": "tax_pd", "type": "integer", "mode": "nullable"}, + {"name": "subseccd", "type": "integer", "mode": "nullable"}, + {"name": "s501c3or4947a1cd", "type": "string", "mode": "nullable"}, + {"name": "schdbind", "type": "string", "mode": "nullable"}, + {"name": "politicalactvtscd", "type": "string", "mode": "nullable"}, + {"name": "lbbyingactvtscd", "type": "string", "mode": "nullable"}, + {"name": "subjto6033cd", "type": "string", "mode": "nullable"}, + {"name": "dnradvisedfundscd", "type": "string", "mode": "nullable"}, + {"name": "prptyintrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "maintwrkofartcd", "type": "string", "mode": "nullable"}, + {"name": "crcounselingqstncd", "type": "string", "mode": "nullable"}, + {"name": "hldassetsintermpermcd", "type": "string", "mode": "nullable"}, + {"name": "rptlndbldgeqptcd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstothsecd", "type": "string", "mode": "nullable"}, + {"name": "rptinvstprgrelcd", "type": "string", "mode": "nullable"}, + {"name": "rptothasstcd", "type": "string", "mode": "nullable"}, + {"name": "rptothliabcd", "type": "string", "mode": "nullable"}, + {"name": "sepcnsldtfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "sepindaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "inclinfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "operateschools170cd", "type": "string", "mode": "nullable"}, + {"name": "frgnofficecd", "type": "string", "mode": "nullable"}, + {"name": "frgnrevexpnscd", "type": "string", "mode": "nullable"}, + {"name": "frgngrntscd", "type": "string", "mode": "nullable"}, + {"name": "frgnaggragrntscd", "type": "string", "mode": "nullable"}, + {"name": "rptprofndrsngfeescd", "type": "string", "mode": "nullable"}, + {"name": "rptincfnndrsngcd", "type": "string", "mode": "nullable"}, + {"name": "rptincgamingcd", "type": "string", "mode": "nullable"}, + {"name": "operatehosptlcd", "type": "string", "mode": "nullable"}, + {"name": "hospaudfinstmtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstogovtcd", "type": "string", "mode": "nullable"}, + {"name": "rptgrntstoindvcd", "type": "string", "mode": "nullable"}, + {"name": "rptyestocompnstncd", "type": "string", "mode": "nullable"}, + {"name": "txexmptbndcd", "type": "string", "mode": "nullable"}, + {"name": "invstproceedscd", "type": "string", "mode": "nullable"}, + {"name": "maintescrwaccntcd", "type": "string", "mode": "nullable"}, + {"name": "actonbehalfcd", "type": "string", "mode": "nullable"}, + {"name": "engageexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "awarexcessbnftcd", "type": "string", "mode": "nullable"}, + {"name": "loantofficercd", "type": "string", "mode": "nullable"}, + {"name": "grantoofficercd", "type": "string", "mode": "nullable"}, + {"name": "dirbusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "fmlybusnreltdcd", "type": "string", "mode": "nullable"}, + {"name": "servasofficercd", "type": "string", "mode": "nullable"}, + {"name": "recvnoncashcd", "type": "string", "mode": "nullable"}, + {"name": "recvartcd", "type": "string", "mode": "nullable"}, + {"name": "ceaseoperationscd", "type": "string", "mode": "nullable"}, + {"name": "sellorexchcd", "type": "string", "mode": "nullable"}, + {"name": "ownsepentcd", "type": "string", "mode": "nullable"}, + {"name": "reltdorgcd", "type": "string", "mode": "nullable"}, + {"name": "intincntrlcd", "type": "string", "mode": "nullable"}, + {"name": "orgtrnsfrcd", "type": "string", "mode": "nullable"}, + {"name": "conduct5percentcd", "type": "string", "mode": "nullable"}, + {"name": "compltschocd", "type": "string", "mode": "nullable"}, + {"name": "f1096cnt", "type": "integer", "mode": "nullable"}, + {"name": "fw2gcnt", "type": "integer", "mode": "nullable"}, + {"name": "wthldngrulescd", "type": "string", "mode": "nullable"}, + {"name": "noemplyeesw3cnt", "type": "integer", "mode": "nullable"}, + {"name": "filerqrdrtnscd", "type": "string", "mode": "nullable"}, + {"name": "unrelbusinccd", "type": "string", "mode": "nullable"}, + {"name": "filedf990tcd", "type": "string", "mode": "nullable"}, + {"name": "frgnacctcd", "type": "string", "mode": "nullable"}, + {"name": "prohibtdtxshltrcd", "type": "string", "mode": "nullable"}, + {"name": "prtynotifyorgcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8886tcd", "type": "string", "mode": "nullable"}, + {"name": "solicitcntrbcd", "type": "string", "mode": "nullable"}, + {"name": "exprstmntcd", "type": "string", "mode": "nullable"}, + {"name": "providegoodscd", "type": "string", "mode": "nullable"}, + {"name": "notfydnrvalcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8282cd", "type": "string", "mode": "nullable"}, + {"name": "f8282cnt", "type": "integer", "mode": "nullable"}, + {"name": "fndsrcvdcd", "type": "string", "mode": "nullable"}, + {"name": "premiumspaidcd", "type": "string", "mode": "nullable"}, + {"name": "filedf8899cd", "type": "string", "mode": "nullable"}, + {"name": "filedf1098ccd", "type": "string", "mode": "nullable"}, + {"name": "excbushldngscd", "type": "string", "mode": "nullable"}, + {"name": "s4966distribcd", "type": "string", "mode": "nullable"}, + {"name": "distribtodonorcd", "type": "string", "mode": "nullable"}, + {"name": "initiationfees", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptspublicuse", "type": "integer", "mode": "nullable"}, + {"name": "grsincmembers", "type": "integer", "mode": "nullable"}, + {"name": "grsincother", "type": "integer", "mode": "nullable"}, + {"name": "filedlieuf1041cd", "type": "string", "mode": "nullable"}, + {"name": "txexmptint", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthplncd", "type": "string", "mode": "nullable"}, + {"name": "qualhlthreqmntn", "type": "integer", "mode": "nullable"}, + {"name": "qualhlthonhnd", "type": "integer", "mode": "nullable"}, + {"name": "rcvdpdtngcd", "type": "string", "mode": "nullable"}, + {"name": "filedf720cd", "type": "string", "mode": "nullable"}, + {"name": "totreprtabled", "type": "integer", "mode": "nullable"}, + {"name": "totcomprelatede", "type": "integer", "mode": "nullable"}, + {"name": "totestcompf", "type": "integer", "mode": "nullable"}, + {"name": "noindiv100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "nocontractor100kcnt", "type": "integer", "mode": "nullable"}, + {"name": "totcntrbgfts", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2acd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2acola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2bcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2bcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ccd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ccola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2dcd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2dcola", "type": "integer", "mode": "nullable"}, + {"name": "prgmservcode2ecd", "type": "integer", "mode": "nullable"}, + {"name": "totrev2ecola", "type": "integer", "mode": "nullable"}, + {"name": "totrev2fcola", "type": "integer", "mode": "nullable"}, + {"name": "totprgmrevnue", "type": "integer", "mode": "nullable"}, + {"name": "invstmntinc", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsproceeds", "type": "integer", "mode": "nullable"}, + {"name": "royaltsinc", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsreal", "type": "integer", "mode": "nullable"}, + {"name": "grsrntsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlexpnsprsnl", "type": "integer", "mode": "nullable"}, + {"name": "rntlincreal", "type": "integer", "mode": "nullable"}, + {"name": "rntlincprsnl", "type": "integer", "mode": "nullable"}, + {"name": "netrntlinc", "type": "integer", "mode": "nullable"}, + {"name": "grsalesecur", "type": "integer", "mode": "nullable"}, + {"name": "grsalesothr", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisecur", "type": "integer", "mode": "nullable"}, + {"name": "cstbasisothr", "type": "integer", "mode": "nullable"}, + {"name": "gnlsecur", "type": "integer", "mode": "nullable"}, + {"name": "gnlsothr", "type": "integer", "mode": "nullable"}, + {"name": "netgnls", "type": "integer", "mode": "nullable"}, + {"name": "grsincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "lessdirfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "netincfndrsng", "type": "integer", "mode": "nullable"}, + {"name": "grsincgaming", "type": "integer", "mode": "nullable"}, + {"name": "lessdirgaming", "type": "integer", "mode": "nullable"}, + {"name": "netincgaming", "type": "integer", "mode": "nullable"}, + {"name": "grsalesinvent", "type": "integer", "mode": "nullable"}, + {"name": "lesscstofgoods", "type": "integer", "mode": "nullable"}, + {"name": "netincsales", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11acd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtota", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11bcd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11b", "type": "integer", "mode": "nullable"}, + {"name": "miscrev11ccd", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11c", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11d", "type": "integer", "mode": "nullable"}, + {"name": "miscrevtot11e", "type": "integer", "mode": "nullable"}, + {"name": "totrevenue", "type": "integer", "mode": "nullable"}, + {"name": "grntstogovt", "type": "integer", "mode": "nullable"}, + {"name": "grnsttoindiv", "type": "integer", "mode": "nullable"}, + {"name": "grntstofrgngovt", "type": "integer", "mode": "nullable"}, + {"name": "benifitsmembrs", "type": "integer", "mode": "nullable"}, + {"name": "compnsatncurrofcr", "type": "integer", "mode": "nullable"}, + {"name": "compnsatnandothr", "type": "integer", "mode": "nullable"}, + {"name": "othrsalwages", "type": "integer", "mode": "nullable"}, + {"name": "pensionplancontrb", "type": "integer", "mode": "nullable"}, + {"name": "othremplyeebenef", "type": "integer", "mode": "nullable"}, + {"name": "payrolltx", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcmgmt", "type": "integer", "mode": "nullable"}, + {"name": "legalfees", "type": "integer", "mode": "nullable"}, + {"name": "accntingfees", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvclobby", "type": "integer", "mode": "nullable"}, + {"name": "profndraising", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcinvstmgmt", "type": "integer", "mode": "nullable"}, + {"name": "feesforsrvcothr", "type": "integer", "mode": "nullable"}, + {"name": "advrtpromo", "type": "integer", "mode": "nullable"}, + {"name": "officexpns", "type": "integer", "mode": "nullable"}, + {"name": "infotech", "type": "integer", "mode": "nullable"}, + {"name": "royaltsexpns", "type": "integer", "mode": "nullable"}, + {"name": "occupancy", "type": "integer", "mode": "nullable"}, + {"name": "travel", "type": "integer", "mode": "nullable"}, + {"name": "travelofpublicoffcl", "type": "integer", "mode": "nullable"}, + {"name": "converconventmtng", "type": "integer", "mode": "nullable"}, + {"name": "interestamt", "type": "integer", "mode": "nullable"}, + {"name": "pymtoaffiliates", "type": "integer", "mode": "nullable"}, + {"name": "deprcatndepletn", "type": "integer", "mode": "nullable"}, + {"name": "insurance", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsa", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsb", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsc", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsd", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnse", "type": "integer", "mode": "nullable"}, + {"name": "othrexpnsf", "type": "integer", "mode": "nullable"}, + {"name": "totfuncexpns", "type": "integer", "mode": "nullable"}, + {"name": "nonintcashend", "type": "integer", "mode": "nullable"}, + {"name": "svngstempinvend", "type": "integer", "mode": "nullable"}, + {"name": "pldgegrntrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "accntsrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "currfrmrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "rcvbldisqualend", "type": "integer", "mode": "nullable"}, + {"name": "notesloansrcvblend", "type": "integer", "mode": "nullable"}, + {"name": "invntriesalesend", "type": "integer", "mode": "nullable"}, + {"name": "prepaidexpnsend", "type": "integer", "mode": "nullable"}, + {"name": "lndbldgsequipend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsothrend", "type": "integer", "mode": "nullable"}, + {"name": "invstmntsprgmend", "type": "integer", "mode": "nullable"}, + {"name": "intangibleassetsend", "type": "integer", "mode": "nullable"}, + {"name": "othrassetsend", "type": "integer", "mode": "nullable"}, + {"name": "totassetsend", "type": "integer", "mode": "nullable"}, + {"name": "accntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "grntspayableend", "type": "integer", "mode": "nullable"}, + {"name": "deferedrevnuend", "type": "integer", "mode": "nullable"}, + {"name": "txexmptbndsend", "type": "integer", "mode": "nullable"}, + {"name": "escrwaccntliabend", "type": "integer", "mode": "nullable"}, + {"name": "paybletoffcrsend", "type": "integer", "mode": "nullable"}, + {"name": "secrdmrtgsend", "type": "integer", "mode": "nullable"}, + {"name": "unsecurednotesend", "type": "integer", "mode": "nullable"}, + {"name": "othrliabend", "type": "integer", "mode": "nullable"}, + {"name": "totliabend", "type": "integer", "mode": "nullable"}, + {"name": "unrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "temprstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "permrstrctnetasstsend", "type": "integer", "mode": "nullable"}, + {"name": "capitalstktrstend", "type": "integer", "mode": "nullable"}, + {"name": "paidinsurplusend", "type": "integer", "mode": "nullable"}, + {"name": "retainedearnend", "type": "integer", "mode": "nullable"}, + {"name": "totnetassetend", "type": "integer", "mode": "nullable"}, + {"name": "totnetliabastend", "type": "integer", "mode": "nullable"}, + {"name": "nonpfrea", "type": "integer", "mode": "nullable"}, + {"name": "totnooforgscnt", "type": "integer", "mode": "nullable"}, + {"name": "totsupport", "type": "integer", "mode": "nullable"}, + {"name": "gftgrntsrcvd170", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied170", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval170", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "exceeds2pct170", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesspct170", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot170", "type": "integer", "mode": "nullable"}, + {"name": "grsinc170", "type": "integer", "mode": "nullable"}, + {"name": "netincunreltd170", "type": "integer", "mode": "nullable"}, + {"name": "othrinc170", "type": "integer", "mode": "nullable"}, + {"name": "totsupp170", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsrelated170", "type": "integer", "mode": "nullable"}, + {"name": "totgftgrntrcvd509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsadmissn509", "type": "integer", "mode": "nullable"}, + {"name": "grsrcptsactivities509", "type": "integer", "mode": "nullable"}, + {"name": "txrevnuelevied509", "type": "integer", "mode": "nullable"}, + {"name": "srvcsval509", "type": "integer", "mode": "nullable"}, + {"name": "pubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "rcvdfrmdisqualsub509", "type": "integer", "mode": "nullable"}, + {"name": "exceeds1pct509", "type": "integer", "mode": "nullable"}, + {"name": "subtotpub509", "type": "integer", "mode": "nullable"}, + {"name": "pubsupplesub509", "type": "integer", "mode": "nullable"}, + {"name": "samepubsuppsubtot509", "type": "integer", "mode": "nullable"}, + {"name": "grsinc509", "type": "integer", "mode": "nullable"}, + {"name": "unreltxincls511tx509", "type": "integer", "mode": "nullable"}, + {"name": "subtotsuppinc509", "type": "integer", "mode": "nullable"}, + {"name": "netincunrelatd509", "type": "integer", "mode": "nullable"}, + {"name": "othrinc509", "type": "integer", "mode": "nullable"}, + {"name": "totsupp509", "type": "integer", "mode": "nullable"}, + ], + ) + + irs_990_2017_transform_csv >> load_irs_990_2017_to_bq diff --git a/datasets/irs_990/irs_990_2017/pipeline.yaml b/datasets/irs_990/irs_990_2017/pipeline.yaml new file mode 100644 index 000000000..1e365c9e7 --- /dev/null +++ b/datasets/irs_990/irs_990_2017/pipeline.yaml @@ -0,0 +1,854 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_2017 + + # Description of the table + description: "irs_990_2017 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_2017 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_2017_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_2017" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/17eofinextract990.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_2017/data_output.csv" + PIPELINE_NAME: "irs_990_2017" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","s501c3or4947a1cd","schdbind","politicalactvtscd","lbbyingactvtscd","subjto6033cd","dnradvisedfundscd","prptyintrcvdcd","maintwrkofartcd","crcounselingqstncd","hldassetsintermpermcd","rptlndbldgeqptcd","rptinvstothsecd","rptinvstprgrelcd","rptothasstcd","rptothliabcd","sepcnsldtfinstmtcd","sepindaudfinstmtcd","inclinfinstmtcd","operateschools170cd","frgnofficecd","frgnrevexpnscd","frgngrntscd","frgnaggragrntscd","rptprofndrsngfeescd","rptincfnndrsngcd","rptincgamingcd","operatehosptlcd","hospaudfinstmtcd","rptgrntstogovtcd","rptgrntstoindvcd","rptyestocompnstncd","txexmptbndcd","invstproceedscd","maintescrwaccntcd","actonbehalfcd","engageexcessbnftcd","awarexcessbnftcd","loantofficercd","grantoofficercd","dirbusnreltdcd","fmlybusnreltdcd","servasofficercd","recvnoncashcd","recvartcd","ceaseoperationscd","sellorexchcd","ownsepentcd","reltdorgcd","intincntrlcd","orgtrnsfrcd","conduct5percentcd","compltschocd","f1096cnt","fw2gcnt","wthldngrulescd","noemplyeesw3cnt","filerqrdrtnscd","unrelbusinccd","filedf990tcd","frgnacctcd","prohibtdtxshltrcd","prtynotifyorgcd","filedf8886tcd","solicitcntrbcd","exprstmntcd","providegoodscd","notfydnrvalcd","filedf8282cd","f8282cnt","fndsrcvdcd","premiumspaidcd","filedf8899cd","filedf1098ccd","excbushldngscd","s4966distribcd","distribtodonorcd","initiationfees","grsrcptspublicuse","grsincmembers","grsincother","filedlieuf1041cd","txexmptint","qualhlthplncd","qualhlthreqmntn","qualhlthonhnd","rcvdpdtngcd","filedf720cd","totreprtabled","totcomprelatede","totestcompf","noindiv100kcnt","nocontractor100kcnt","totcntrbgfts","prgmservcode2acd","totrev2acola","prgmservcode2bcd","totrev2bcola","prgmservcode2ccd","totrev2ccola","prgmservcode2dcd","totrev2dcola","prgmservcode2ecd","totrev2ecola","totrev2fcola","totprgmrevnue","invstmntinc","txexmptbndsproceeds","royaltsinc","grsrntsreal","grsrntsprsnl","rntlexpnsreal","rntlexpnsprsnl","rntlincreal","rntlincprsnl","netrntlinc","grsalesecur","grsalesothr","cstbasisecur","cstbasisothr","gnlsecur","gnlsothr","netgnls","grsincfndrsng","lessdirfndrsng","netincfndrsng","grsincgaming","lessdirgaming","netincgaming","grsalesinvent","lesscstofgoods","netincsales","miscrev11acd","miscrevtota","miscrev11bcd","miscrevtot11b","miscrev11ccd","miscrevtot11c","miscrevtot11d","miscrevtot11e","totrevenue","grntstogovt","grnsttoindiv","grntstofrgngovt","benifitsmembrs","compnsatncurrofcr","compnsatnandothr","othrsalwages","pensionplancontrb","othremplyeebenef","payrolltx","feesforsrvcmgmt","legalfees","accntingfees","feesforsrvclobby","profndraising","feesforsrvcinvstmgmt","feesforsrvcothr","advrtpromo","officexpns","infotech","royaltsexpns","occupancy","travel","travelofpublicoffcl","converconventmtng","interestamt","pymtoaffiliates","deprcatndepletn","insurance","othrexpnsa","othrexpnsb","othrexpnsc","othrexpnsd","othrexpnse","othrexpnsf","totfuncexpns","nonintcashend","svngstempinvend","pldgegrntrcvblend","accntsrcvblend","currfrmrcvblend","rcvbldisqualend","notesloansrcvblend","invntriesalesend","prepaidexpnsend","lndbldgsequipend","invstmntsend","invstmntsothrend","invstmntsprgmend","intangibleassetsend","othrassetsend","totassetsend","accntspayableend","grntspayableend","deferedrevnuend","txexmptbndsend","escrwaccntliabend","paybletoffcrsend","secrdmrtgsend","unsecurednotesend","othrliabend","totliabend","unrstrctnetasstsend","temprstrctnetasstsend","permrstrctnetasstsend","capitalstktrstend","paidinsurplusend","retainedearnend","totnetassetend","totnetliabastend","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"elf": "elf","EIN": "ein","tax_prd": "tax_pd","subseccd": "subseccd","s50Yc3or4947aYcd": "s501c3or4947a1cd","schdbind": "schdbind","politicalactvtscd": "politicalactvtscd","lbbyingactvtscd": "lbbyingactvtscd","subjto6033cd": "subjto6033cd","dnradvisedfundscd": "dnradvisedfundscd","prptyintrcvdcd": "prptyintrcvdcd","maintwrkofartcd": "maintwrkofartcd","crcounselingqstncd": "crcounselingqstncd","hldassetsintermpermcd": "hldassetsintermpermcd","rptlndbldgeqptcd": "rptlndbldgeqptcd","rptinvstothsecd": "rptinvstothsecd","rptinvstprgrelcd": "rptinvstprgrelcd","rptothasstcd": "rptothasstcd","rptothliabcd": "rptothliabcd","sepcnsldtfinstmtcd": "sepcnsldtfinstmtcd","sepindaudfinstmtcd": "sepindaudfinstmtcd","inclinfinstmtcd": "inclinfinstmtcd","operateschoolsY70cd": "operateschools170cd","frgnofficecd": "frgnofficecd","frgnrevexpnscd": "frgnrevexpnscd","frgngrntscd": "frgngrntscd","frgnaggragrntscd": "frgnaggragrntscd","rptprofndrsngfeescd": "rptprofndrsngfeescd","rptincfnndrsngcd": "rptincfnndrsngcd","rptincgamingcd": "rptincgamingcd","operatehosptlcd": "operatehosptlcd","hospaudfinstmtcd": "hospaudfinstmtcd","rptgrntstogovtcd": "rptgrntstogovtcd","rptgrntstoindvcd": "rptgrntstoindvcd","rptyestocompnstncd": "rptyestocompnstncd","txexmptbndcd": "txexmptbndcd","invstproceedscd": "invstproceedscd","maintescrwaccntcd": "maintescrwaccntcd","actonbehalfcd": "actonbehalfcd","engageexcessbnftcd": "engageexcessbnftcd","awarexcessbnftcd": "awarexcessbnftcd","loantofficercd": "loantofficercd","grantoofficercd": "grantoofficercd","dirbusnreltdcd": "dirbusnreltdcd","fmlybusnreltdcd": "fmlybusnreltdcd","servasofficercd": "servasofficercd","recvnoncashcd": "recvnoncashcd","recvartcd": "recvartcd","ceaseoperationscd": "ceaseoperationscd","sellorexchcd": "sellorexchcd","ownsepentcd": "ownsepentcd","reltdorgcd": "reltdorgcd","intincntrlcd": "intincntrlcd","orgtrnsfrcd": "orgtrnsfrcd","conduct5percentcd": "conduct5percentcd","compltschocd": "compltschocd","f1096cnt": "f1096cnt","fw2gcnt": "fw2gcnt","wthldngrulescd": "wthldngrulescd","noemplyeesw3cnt": "noemplyeesw3cnt","filerqrdrtnscd": "filerqrdrtnscd","unrelbusinccd": "unrelbusinccd","filedf990tcd": "filedf990tcd","frgnacctcd": "frgnacctcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","prtynotifyorgcd": "prtynotifyorgcd","filedf8886tcd": "filedf8886tcd","solicitcntrbcd": "solicitcntrbcd","exprstmntcd": "exprstmntcd","providegoodscd": "providegoodscd","notfydnrvalcd": "notfydnrvalcd","filedf8N8Ncd": "filedf8282cd","f8282cnt": "f8282cnt","fndsrcvdcd": "fndsrcvdcd","premiumspaidcd": "premiumspaidcd","filedf8899cd": "filedf8899cd","filedfY098ccd": "filedf1098ccd","excbushldngscd": "excbushldngscd","s4966distribcd": "s4966distribcd","distribtodonorcd": "distribtodonorcd","initiationfees": "initiationfees","grsrcptspublicuse": "grsrcptspublicuse","grsincmembers": "grsincmembers","grsincother": "grsincother","filedlieufY04Ycd": "filedlieuf1041cd","txexmptint": "txexmptint","qualhlthplncd": "qualhlthplncd","qualhlthreqmntn": "qualhlthreqmntn","qualhlthonhnd": "qualhlthonhnd","rcvdpdtngcd": "rcvdpdtngcd","filedf7N0cd": "filedf720cd","totreprtabled": "totreprtabled","totcomprelatede": "totcomprelatede","totestcompf": "totestcompf","noindiv100kcnt": "noindiv100kcnt","nocontractor100kcnt": "nocontractor100kcnt","totcntrbgfts": "totcntrbgfts","prgmservcode2acd": "prgmservcode2acd","totrev2acola": "totrev2acola","prgmservcode2bcd": "prgmservcode2bcd","totrev2bcola": "totrev2bcola","prgmservcode2ccd": "prgmservcode2ccd","totrev2ccola": "totrev2ccola","prgmservcode2dcd": "prgmservcode2dcd","totrev2dcola": "totrev2dcola","prgmservcode2ecd": "prgmservcode2ecd","totrev2ecola": "totrev2ecola","totrev2fcola": "totrev2fcola","totprgmrevnue": "totprgmrevnue","invstmntinc": "invstmntinc","txexmptbndsproceeds": "txexmptbndsproceeds","royaltsinc": "royaltsinc","grsrntsreal": "grsrntsreal","grsrntsprsnl": "grsrntsprsnl","rntlexpnsreal": "rntlexpnsreal","rntlexpnsprsnl": "rntlexpnsprsnl","rntlincreal": "rntlincreal","rntlincprsnl": "rntlincprsnl","netrntlinc": "netrntlinc","grsalesecur": "grsalesecur","grsalesothr": "grsalesothr","cstbasisecur": "cstbasisecur","cstbasisothr": "cstbasisothr","gnlsecur": "gnlsecur","gnlsothr": "gnlsothr","netgnls": "netgnls","grsincfndrsng": "grsincfndrsng","lessdirfndrsng": "lessdirfndrsng","netincfndrsng": "netincfndrsng","grsincgaming": "grsincgaming","lessdirgaming": "lessdirgaming","netincgaming": "netincgaming","grsalesinvent": "grsalesinvent","lesscstofgoods": "lesscstofgoods","netincsales": "netincsales","miscrev11acd": "miscrev11acd","miscrevtota": "miscrevtota","miscrev11bcd": "miscrev11bcd","miscrevtot11b": "miscrevtot11b","miscrev11ccd": "miscrev11ccd","miscrevtot11c": "miscrevtot11c","miscrevtot11d": "miscrevtot11d","miscrevtot11e": "miscrevtot11e","totrevenue": "totrevenue","grntstogovt": "grntstogovt","grnsttoindiv": "grnsttoindiv","grntstofrgngovt": "grntstofrgngovt","benifitsmembrs": "benifitsmembrs","compnsatncurrofcr": "compnsatncurrofcr","compnsatnandothr": "compnsatnandothr","othrsalwages": "othrsalwages","pensionplancontrb": "pensionplancontrb","othremplyeebenef": "othremplyeebenef","payrolltx": "payrolltx","feesforsrvcmgmt": "feesforsrvcmgmt","legalfees": "legalfees","accntingfees": "accntingfees","feesforsrvclobby": "feesforsrvclobby","profndraising": "profndraising","feesforsrvcinvstmgmt": "feesforsrvcinvstmgmt","feesforsrvcothr": "feesforsrvcothr","advrtpromo": "advrtpromo","officexpns": "officexpns","infotech": "infotech","royaltsexpns": "royaltsexpns","occupancy": "occupancy","travel": "travel","travelofpublicoffcl": "travelofpublicoffcl","converconventmtng": "converconventmtng","interestamt": "interestamt","pymtoaffiliates": "pymtoaffiliates","deprcatndepletn": "deprcatndepletn","insurance": "insurance","othrexpnsa": "othrexpnsa","othrexpnsb": "othrexpnsb","othrexpnsc": "othrexpnsc","othrexpnsd": "othrexpnsd","othrexpnse": "othrexpnse","othrexpnsf": "othrexpnsf","totfuncexpns": "totfuncexpns","nonintcashend": "nonintcashend","svngstempinvend": "svngstempinvend","pldgegrntrcvblend": "pldgegrntrcvblend","accntsrcvblend": "accntsrcvblend","currfrmrcvblend": "currfrmrcvblend","rcvbldisqualend": "rcvbldisqualend","notesloansrcvblend": "notesloansrcvblend","invntriesalesend": "invntriesalesend","prepaidexpnsend": "prepaidexpnsend","lndbldgsequipend": "lndbldgsequipend","invstmntsend": "invstmntsend","invstmntsothrend": "invstmntsothrend","invstmntsprgmend": "invstmntsprgmend","intangibleassetsend": "intangibleassetsend","othrassetsend": "othrassetsend","totassetsend": "totassetsend","accntspayableend": "accntspayableend","grntspayableend": "grntspayableend","deferedrevnuend": "deferedrevnuend","txexmptbndsend": "txexmptbndsend","escrwaccntliabend": "escrwaccntliabend","paybletoffcrsend": "paybletoffcrsend","secrdmrtgsend": "secrdmrtgsend","unsecurednotesend": "unsecurednotesend","othrliabend": "othrliabend","totliabend": "totliabend","unrstrctnetasstsend": "unrstrctnetasstsend","temprstrctnetasstsend": "temprstrctnetasstsend","permrstrctnetasstsend": "permrstrctnetasstsend","capitalstktrstend": "capitalstktrstend","paidinsurplusend": "paidinsurplusend","retainedearnend": "retainedearnend","totnetassetend": "totnetassetend","totnetliabastend": "totnetliabastend","nonpfrea": "nonpfrea","totnooforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntsrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","exceeds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunreltd170": "netincunreltd170","othrinc170": "othrinc170","totsupp170": "totsupp170","grsrcptsrelated170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmissn509": "grsrcptsadmissn509","grsrcptsactivities509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","exceeds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunrelatd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_2017_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_2017/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_2017" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name : "ein" + type : "string" + mode : "required" + - name : "elf" + type : "string" + mode : "nullable" + - name : "tax_pd" + type : "integer" + mode : "nullable" + - name : "subseccd" + type : "integer" + mode : "nullable" + - name : "s501c3or4947a1cd" + type : "string" + mode : "nullable" + - name : "schdbind" + type : "string" + mode : "nullable" + - name : "politicalactvtscd" + type : "string" + mode : "nullable" + - name : "lbbyingactvtscd" + type : "string" + mode : "nullable" + - name : "subjto6033cd" + type : "string" + mode : "nullable" + - name : "dnradvisedfundscd" + type : "string" + mode : "nullable" + - name : "prptyintrcvdcd" + type : "string" + mode : "nullable" + - name : "maintwrkofartcd" + type : "string" + mode : "nullable" + - name : "crcounselingqstncd" + type : "string" + mode : "nullable" + - name : "hldassetsintermpermcd" + type : "string" + mode : "nullable" + - name : "rptlndbldgeqptcd" + type : "string" + mode : "nullable" + - name : "rptinvstothsecd" + type : "string" + mode : "nullable" + - name : "rptinvstprgrelcd" + type : "string" + mode : "nullable" + - name : "rptothasstcd" + type : "string" + mode : "nullable" + - name : "rptothliabcd" + type : "string" + mode : "nullable" + - name : "sepcnsldtfinstmtcd" + type : "string" + mode : "nullable" + - name : "sepindaudfinstmtcd" + type : "string" + mode : "nullable" + - name : "inclinfinstmtcd" + type : "string" + mode : "nullable" + - name : "operateschools170cd" + type : "string" + mode : "nullable" + - name : "frgnofficecd" + type : "string" + mode : "nullable" + - name : "frgnrevexpnscd" + type : "string" + mode : "nullable" + - name : "frgngrntscd" + type : "string" + mode : "nullable" + - name : "frgnaggragrntscd" + type : "string" + mode : "nullable" + - name : "rptprofndrsngfeescd" + type : "string" + mode : "nullable" + - name : "rptincfnndrsngcd" + type : "string" + mode : "nullable" + - name : "rptincgamingcd" + type : "string" + mode : "nullable" + - name : "operatehosptlcd" + type : "string" + mode : "nullable" + - name : "hospaudfinstmtcd" + type : "string" + mode : "nullable" + - name : "rptgrntstogovtcd" + type : "string" + mode : "nullable" + - name : "rptgrntstoindvcd" + type : "string" + mode : "nullable" + - name : "rptyestocompnstncd" + type : "string" + mode : "nullable" + - name : "txexmptbndcd" + type : "string" + mode : "nullable" + - name : "invstproceedscd" + type : "string" + mode : "nullable" + - name : "maintescrwaccntcd" + type : "string" + mode : "nullable" + - name : "actonbehalfcd" + type : "string" + mode : "nullable" + - name : "engageexcessbnftcd" + type : "string" + mode : "nullable" + - name : "awarexcessbnftcd" + type : "string" + mode : "nullable" + - name : "loantofficercd" + type : "string" + mode : "nullable" + - name : "grantoofficercd" + type : "string" + mode : "nullable" + - name : "dirbusnreltdcd" + type : "string" + mode : "nullable" + - name : "fmlybusnreltdcd" + type : "string" + mode : "nullable" + - name : "servasofficercd" + type : "string" + mode : "nullable" + - name : "recvnoncashcd" + type : "string" + mode : "nullable" + - name : "recvartcd" + type : "string" + mode : "nullable" + - name : "ceaseoperationscd" + type : "string" + mode : "nullable" + - name : "sellorexchcd" + type : "string" + mode : "nullable" + - name : "ownsepentcd" + type : "string" + mode : "nullable" + - name : "reltdorgcd" + type : "string" + mode : "nullable" + - name : "intincntrlcd" + type : "string" + mode : "nullable" + - name : "orgtrnsfrcd" + type : "string" + mode : "nullable" + - name : "conduct5percentcd" + type : "string" + mode : "nullable" + - name : "compltschocd" + type : "string" + mode : "nullable" + - name : "f1096cnt" + type : "integer" + mode : "nullable" + - name : "fw2gcnt" + type : "integer" + mode : "nullable" + - name : "wthldngrulescd" + type : "string" + mode : "nullable" + - name : "noemplyeesw3cnt" + type : "integer" + mode : "nullable" + - name : "filerqrdrtnscd" + type : "string" + mode : "nullable" + - name : "unrelbusinccd" + type : "string" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + mode : "nullable" + - name : "frgnacctcd" + type : "string" + mode : "nullable" + - name : "prohibtdtxshltrcd" + type : "string" + mode : "nullable" + - name : "prtynotifyorgcd" + type : "string" + mode : "nullable" + - name : "filedf8886tcd" + type : "string" + mode : "nullable" + - name : "solicitcntrbcd" + type : "string" + mode : "nullable" + - name : "exprstmntcd" + type : "string" + mode : "nullable" + - name : "providegoodscd" + type : "string" + mode : "nullable" + - name : "notfydnrvalcd" + type : "string" + mode : "nullable" + - name : "filedf8282cd" + type : "string" + mode : "nullable" + - name : "f8282cnt" + type : "integer" + mode : "nullable" + - name : "fndsrcvdcd" + type : "string" + mode : "nullable" + - name : "premiumspaidcd" + type : "string" + mode : "nullable" + - name : "filedf8899cd" + type : "string" + mode : "nullable" + - name : "filedf1098ccd" + type : "string" + mode : "nullable" + - name : "excbushldngscd" + type : "string" + mode : "nullable" + - name : "s4966distribcd" + type : "string" + mode : "nullable" + - name : "distribtodonorcd" + type : "string" + mode : "nullable" + - name : "initiationfees" + type : "integer" + mode : "nullable" + - name : "grsrcptspublicuse" + type : "integer" + mode : "nullable" + - name : "grsincmembers" + type : "integer" + mode : "nullable" + - name : "grsincother" + type : "integer" + mode : "nullable" + - name : "filedlieuf1041cd" + type : "string" + mode : "nullable" + - name : "txexmptint" + type : "integer" + mode : "nullable" + - name : "qualhlthplncd" + type : "string" + mode : "nullable" + - name : "qualhlthreqmntn" + type : "integer" + mode : "nullable" + - name : "qualhlthonhnd" + type : "integer" + mode : "nullable" + - name : "rcvdpdtngcd" + type : "string" + mode : "nullable" + - name : "filedf720cd" + type : "string" + mode : "nullable" + - name : "totreprtabled" + type : "integer" + mode : "nullable" + - name : "totcomprelatede" + type : "integer" + mode : "nullable" + - name : "totestcompf" + type : "integer" + mode : "nullable" + - name : "noindiv100kcnt" + type : "integer" + mode : "nullable" + - name : "nocontractor100kcnt" + type : "integer" + mode : "nullable" + - name : "totcntrbgfts" + type : "integer" + mode : "nullable" + - name : "prgmservcode2acd" + type : "integer" + mode : "nullable" + - name : "totrev2acola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2bcd" + type : "integer" + mode : "nullable" + - name : "totrev2bcola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2ccd" + type : "integer" + mode : "nullable" + - name : "totrev2ccola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2dcd" + type : "integer" + mode : "nullable" + - name : "totrev2dcola" + type : "integer" + mode : "nullable" + - name : "prgmservcode2ecd" + type : "integer" + mode : "nullable" + - name : "totrev2ecola" + type : "integer" + mode : "nullable" + - name : "totrev2fcola" + type : "integer" + mode : "nullable" + - name : "totprgmrevnue" + type : "integer" + mode : "nullable" + - name : "invstmntinc" + type : "integer" + mode : "nullable" + - name : "txexmptbndsproceeds" + type : "integer" + mode : "nullable" + - name : "royaltsinc" + type : "integer" + mode : "nullable" + - name : "grsrntsreal" + type : "integer" + mode : "nullable" + - name : "grsrntsprsnl" + type : "integer" + mode : "nullable" + - name : "rntlexpnsreal" + type : "integer" + mode : "nullable" + - name : "rntlexpnsprsnl" + type : "integer" + mode : "nullable" + - name : "rntlincreal" + type : "integer" + mode : "nullable" + - name : "rntlincprsnl" + type : "integer" + mode : "nullable" + - name : "netrntlinc" + type : "integer" + mode : "nullable" + - name : "grsalesecur" + type : "integer" + mode : "nullable" + - name : "grsalesothr" + type : "integer" + mode : "nullable" + - name : "cstbasisecur" + type : "integer" + mode : "nullable" + - name : "cstbasisothr" + type : "integer" + mode : "nullable" + - name : "gnlsecur" + type : "integer" + mode : "nullable" + - name : "gnlsothr" + type : "integer" + mode : "nullable" + - name : "netgnls" + type : "integer" + mode : "nullable" + - name : "grsincfndrsng" + type : "integer" + mode : "nullable" + - name : "lessdirfndrsng" + type : "integer" + mode : "nullable" + - name : "netincfndrsng" + type : "integer" + mode : "nullable" + - name : "grsincgaming" + type : "integer" + mode : "nullable" + - name : "lessdirgaming" + type : "integer" + mode : "nullable" + - name : "netincgaming" + type : "integer" + mode : "nullable" + - name : "grsalesinvent" + type : "integer" + mode : "nullable" + - name : "lesscstofgoods" + type : "integer" + mode : "nullable" + - name : "netincsales" + type : "integer" + mode : "nullable" + - name : "miscrev11acd" + type : "integer" + mode : "nullable" + - name : "miscrevtota" + type : "integer" + mode : "nullable" + - name : "miscrev11bcd" + type : "integer" + mode : "nullable" + - name : "miscrevtot11b" + type : "integer" + mode : "nullable" + - name : "miscrev11ccd" + type : "integer" + mode : "nullable" + - name : "miscrevtot11c" + type : "integer" + mode : "nullable" + - name : "miscrevtot11d" + type : "integer" + mode : "nullable" + - name : "miscrevtot11e" + type : "integer" + mode : "nullable" + - name : "totrevenue" + type : "integer" + mode : "nullable" + - name : "grntstogovt" + type : "integer" + mode : "nullable" + - name : "grnsttoindiv" + type : "integer" + mode : "nullable" + - name : "grntstofrgngovt" + type : "integer" + mode : "nullable" + - name : "benifitsmembrs" + type : "integer" + mode : "nullable" + - name : "compnsatncurrofcr" + type : "integer" + mode : "nullable" + - name : "compnsatnandothr" + type : "integer" + mode : "nullable" + - name : "othrsalwages" + type : "integer" + mode : "nullable" + - name : "pensionplancontrb" + type : "integer" + mode : "nullable" + - name : "othremplyeebenef" + type : "integer" + mode : "nullable" + - name : "payrolltx" + type : "integer" + mode : "nullable" + - name : "feesforsrvcmgmt" + type : "integer" + mode : "nullable" + - name : "legalfees" + type : "integer" + mode : "nullable" + - name : "accntingfees" + type : "integer" + mode : "nullable" + - name : "feesforsrvclobby" + type : "integer" + mode : "nullable" + - name : "profndraising" + type : "integer" + mode : "nullable" + - name : "feesforsrvcinvstmgmt" + type : "integer" + mode : "nullable" + - name : "feesforsrvcothr" + type : "integer" + mode : "nullable" + - name : "advrtpromo" + type : "integer" + mode : "nullable" + - name : "officexpns" + type : "integer" + mode : "nullable" + - name : "infotech" + type : "integer" + mode : "nullable" + - name : "royaltsexpns" + type : "integer" + mode : "nullable" + - name : "occupancy" + type : "integer" + mode : "nullable" + - name : "travel" + type : "integer" + mode : "nullable" + - name : "travelofpublicoffcl" + type : "integer" + mode : "nullable" + - name : "converconventmtng" + type : "integer" + mode : "nullable" + - name : "interestamt" + type : "integer" + mode : "nullable" + - name : "pymtoaffiliates" + type : "integer" + mode : "nullable" + - name : "deprcatndepletn" + type : "integer" + mode : "nullable" + - name : "insurance" + type : "integer" + mode : "nullable" + - name : "othrexpnsa" + type : "integer" + mode : "nullable" + - name : "othrexpnsb" + type : "integer" + mode : "nullable" + - name : "othrexpnsc" + type : "integer" + mode : "nullable" + - name : "othrexpnsd" + type : "integer" + mode : "nullable" + - name : "othrexpnse" + type : "integer" + mode : "nullable" + - name : "othrexpnsf" + type : "integer" + mode : "nullable" + - name : "totfuncexpns" + type : "integer" + mode : "nullable" + - name : "nonintcashend" + type : "integer" + mode : "nullable" + - name : "svngstempinvend" + type : "integer" + mode : "nullable" + - name : "pldgegrntrcvblend" + type : "integer" + mode : "nullable" + - name : "accntsrcvblend" + type : "integer" + mode : "nullable" + - name : "currfrmrcvblend" + type : "integer" + mode : "nullable" + - name : "rcvbldisqualend" + type : "integer" + mode : "nullable" + - name : "notesloansrcvblend" + type : "integer" + mode : "nullable" + - name : "invntriesalesend" + type : "integer" + mode : "nullable" + - name : "prepaidexpnsend" + type : "integer" + mode : "nullable" + - name : "lndbldgsequipend" + type : "integer" + mode : "nullable" + - name : "invstmntsend" + type : "integer" + mode : "nullable" + - name : "invstmntsothrend" + type : "integer" + mode : "nullable" + - name : "invstmntsprgmend" + type : "integer" + mode : "nullable" + - name : "intangibleassetsend" + type : "integer" + mode : "nullable" + - name : "othrassetsend" + type : "integer" + mode : "nullable" + - name : "totassetsend" + type : "integer" + mode : "nullable" + - name : "accntspayableend" + type : "integer" + mode : "nullable" + - name : "grntspayableend" + type : "integer" + mode : "nullable" + - name : "deferedrevnuend" + type : "integer" + mode : "nullable" + - name : "txexmptbndsend" + type : "integer" + mode : "nullable" + - name : "escrwaccntliabend" + type : "integer" + mode : "nullable" + - name : "paybletoffcrsend" + type : "integer" + mode : "nullable" + - name : "secrdmrtgsend" + type : "integer" + mode : "nullable" + - name : "unsecurednotesend" + type : "integer" + mode : "nullable" + - name : "othrliabend" + type : "integer" + mode : "nullable" + - name : "totliabend" + type : "integer" + mode : "nullable" + - name : "unrstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "temprstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "permrstrctnetasstsend" + type : "integer" + mode : "nullable" + - name : "capitalstktrstend" + type : "integer" + mode : "nullable" + - name : "paidinsurplusend" + type : "integer" + mode : "nullable" + - name : "retainedearnend" + type : "integer" + mode : "nullable" + - name : "totnetassetend" + type : "integer" + mode : "nullable" + - name : "totnetliabastend" + type : "integer" + mode : "nullable" + - name : "nonpfrea" + type : "integer" + mode : "nullable" + - name : "totnooforgscnt" + type : "integer" + mode : "nullable" + - name : "totsupport" + type : "integer" + mode : "nullable" + - name : "gftgrntsrcvd170" + type : "integer" + mode : "nullable" + - name : "txrevnuelevied170" + type : "integer" + mode : "nullable" + - name : "srvcsval170" + type : "integer" + mode : "nullable" + - name : "pubsuppsubtot170" + type : "integer" + mode : "nullable" + - name : "exceeds2pct170" + type : "integer" + mode : "nullable" + - name : "pubsupplesspct170" + type : "integer" + mode : "nullable" + - name : "samepubsuppsubtot170" + type : "integer" + mode : "nullable" + - name : "grsinc170" + type : "integer" + mode : "nullable" + - name : "netincunreltd170" + type : "integer" + mode : "nullable" + - name : "othrinc170" + type : "integer" + mode : "nullable" + - name : "totsupp170" + type : "integer" + mode : "nullable" + - name : "grsrcptsrelated170" + type : "integer" + mode : "nullable" + - name : "totgftgrntrcvd509" + type : "integer" + mode : "nullable" + - name : "grsrcptsadmissn509" + type : "integer" + mode : "nullable" + - name : "grsrcptsactivities509" + type : "integer" + mode : "nullable" + - name : "txrevnuelevied509" + type : "integer" + mode : "nullable" + - name : "srvcsval509" + type : "integer" + mode : "nullable" + - name : "pubsuppsubtot509" + type : "integer" + mode : "nullable" + - name : "rcvdfrmdisqualsub509" + type : "integer" + mode : "nullable" + - name : "exceeds1pct509" + type : "integer" + mode : "nullable" + - name : "subtotpub509" + type : "integer" + mode : "nullable" + - name : "pubsupplesub509" + type : "integer" + mode : "nullable" + - name : "samepubsuppsubtot509" + type : "integer" + mode : "nullable" + - name : "grsinc509" + type : "integer" + mode : "nullable" + - name : "unreltxincls511tx509" + type : "integer" + mode : "nullable" + - name : "subtotsuppinc509" + type : "integer" + mode : "nullable" + - name : "netincunrelatd509" + type : "integer" + mode : "nullable" + - name : "othrinc509" + type : "integer" + mode : "nullable" + - name : "totsupp509" + type : "integer" + mode : "nullable" + + graph_paths: + - "irs_990_2017_transform_csv >> load_irs_990_2017_to_bq" + + + + + \ No newline at end of file diff --git a/datasets/irs_990/irs_990_ez_2014/irs_990_ez_2014_dag.py b/datasets/irs_990/irs_990_ez_2014/irs_990_ez_2014_dag.py new file mode 100644 index 000000000..140fcf2dc --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2014/irs_990_ez_2014_dag.py @@ -0,0 +1,495 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_ez_2014", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_ez_2014_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_ez_2014_transform_csv", + startup_timeout_seconds=600, + name="irs_990_ez_2014", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/14eofinextract990ez.zip", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_ez_2014/data_output.csv", + "PIPELINE_NAME": "irs_990_ez_2014", + "CSV_HEADERS": '["ein","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_ez_2014_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_ez_2014_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_ez_2014/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_ez_2014", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "tax_pd", + "type": "integer", + "description": "Tax period", + "mode": "nullable", + }, + { + "name": "subseccd", + "type": "integer", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "totcntrbs", + "type": "integer", + "description": "Contributions gifts grants etc received", + "mode": "nullable", + }, + { + "name": "prgmservrev", + "type": "integer", + "description": "Program service revenue", + "mode": "nullable", + }, + { + "name": "duesassesmnts", + "type": "integer", + "description": "Membership dues and assessments", + "mode": "nullable", + }, + { + "name": "othrinvstinc", + "type": "integer", + "description": "Investment income", + "mode": "nullable", + }, + { + "name": "grsamtsalesastothr", + "type": "integer", + "description": "Gross amount from sale of assets", + "mode": "nullable", + }, + { + "name": "basisalesexpnsothr", + "type": "integer", + "description": "Cost or other basis and sales expenses", + "mode": "nullable", + }, + { + "name": "gnsaleofastothr", + "type": "integer", + "description": "Gain or (loss) from sale of assets", + "mode": "nullable", + }, + { + "name": "grsincgaming", + "type": "integer", + "description": "Gross income from gaming", + "mode": "nullable", + }, + { + "name": "grsrevnuefndrsng", + "type": "integer", + "description": "Special events gross revenue", + "mode": "nullable", + }, + { + "name": "direxpns", + "type": "integer", + "description": "Special events direct expenses", + "mode": "nullable", + }, + { + "name": "netincfndrsng", + "type": "integer", + "description": "Special events net income (or loss)", + "mode": "nullable", + }, + { + "name": "grsalesminusret", + "type": "integer", + "description": "Gross sales of inventory", + "mode": "nullable", + }, + { + "name": "costgoodsold", + "type": "integer", + "description": "Less: cost of goods sold", + "mode": "nullable", + }, + { + "name": "grsprft", + "type": "integer", + "description": "Gross profit (or loss) from sales of inventory", + "mode": "nullable", + }, + { + "name": "othrevnue", + "type": "integer", + "description": "Other revenue - total", + "mode": "nullable", + }, + { + "name": "totrevnue", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "totexpns", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "totexcessyr", + "type": "integer", + "description": "Excess or deficit", + "mode": "nullable", + }, + { + "name": "othrchgsnetassetfnd", + "type": "integer", + "description": "Other changes in net assets", + "mode": "nullable", + }, + { + "name": "networthend", + "type": "integer", + "description": "Net assets EOY", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets e-o-y", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities e-o-y", + "mode": "nullable", + }, + { + "name": "totnetassetsend", + "type": "integer", + "description": "Total net worth e-o-y", + "mode": "nullable", + }, + { + "name": "actvtynotprevrptcd", + "type": "string", + "description": "Activity not previously reported?", + "mode": "nullable", + }, + { + "name": "chngsinorgcd", + "type": "string", + "description": "Significant changes to governing docs?", + "mode": "nullable", + }, + { + "name": "unrelbusincd", + "type": "string", + "description": "UBI over $1000?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Organization Filed 990T", + "mode": "nullable", + }, + { + "name": "contractioncd", + "type": "string", + "description": "Liquidation dissolution termination or contraction", + "mode": "nullable", + }, + { + "name": "politicalexpend", + "type": "integer", + "description": "Direct or indirect political expenditures", + "mode": "nullable", + }, + { + "name": "filedf1120polcd", + "type": "string", + "description": "File Form 1120-POL?", + "mode": "nullable", + }, + { + "name": "loanstoofficerscd", + "type": "string", + "description": "Loans to/from officers directors or trustees?", + "mode": "nullable", + }, + { + "name": "loanstoofficers", + "type": "integer", + "description": "Amount of loans to/from officers", + "mode": "nullable", + }, + { + "name": "initiationfee", + "type": "integer", + "description": "Initiation fees and capital contributions", + "mode": "nullable", + }, + { + "name": "grspublicrcpts", + "type": "integer", + "description": "Gross receipts for public use of club facilities", + "mode": "nullable", + }, + { + "name": "s4958excessbenefcd", + "type": "string", + "description": "Section 4958 excess benefit transactions?", + "mode": "nullable", + }, + { + "name": "prohibtdtxshltrcd", + "type": "string", + "description": "Party to a prohibited tax shelter transaction?", + "mode": "nullable", + }, + { + "name": "nonpfrea", + "type": "integer", + "description": "Reason for non-PF status", + "mode": "nullable", + }, + { + "name": "totnooforgscnt", + "type": "integer", + "description": "Number of organizations supported", + "mode": "nullable", + }, + { + "name": "totsupport", + "type": "integer", + "description": "Sum of amounts of support", + "mode": "nullable", + }, + { + "name": "gftgrntsrcvd170", + "type": "integer", + "description": "Gifts grants membership fees received (170)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied170", + "type": "integer", + "description": "Tax revenues levied (170)", + "mode": "nullable", + }, + { + "name": "srvcsval170", + "type": "integer", + "description": "Services or facilities furnished by gov (170)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot170", + "type": "integer", + "description": "Public support subtotal (170)", + "mode": "nullable", + }, + { + "name": "exceeds2pct170", + "type": "integer", + "description": "Amount support exceeds total (170)", + "mode": "nullable", + }, + { + "name": "pubsupplesspct170", + "type": "integer", + "description": "Public support (170)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot170", + "type": "integer", + "description": "Public support from line 4 (170)", + "mode": "nullable", + }, + { + "name": "grsinc170", + "type": "integer", + "description": "Gross income from interest etc (170)", + "mode": "nullable", + }, + { + "name": "netincunreltd170", + "type": "integer", + "description": "Net UBI (170)", + "mode": "nullable", + }, + { + "name": "othrinc170", + "type": "integer", + "description": "Other income (170)", + "mode": "nullable", + }, + { + "name": "totsupp170", + "type": "integer", + "description": "Total support (170)", + "mode": "nullable", + }, + { + "name": "grsrcptsrelated170", + "type": "integer", + "description": "Gross receipts from related activities (170)", + "mode": "nullable", + }, + { + "name": "totgftgrntrcvd509", + "type": "integer", + "description": "Gifts grants membership fees received (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsadmissn509", + "type": "integer", + "description": "Receipts from admissions merchandise etc (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsactivities509", + "type": "integer", + "description": "Gross receipts from related activities (509)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied509", + "type": "integer", + "description": "Tax revenues levied (509)", + "mode": "nullable", + }, + { + "name": "srvcsval509", + "type": "integer", + "description": "Services or facilities furnished by gov (509)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "rcvdfrmdisqualsub509", + "type": "integer", + "description": "Amounts from disqualified persons (509)", + "mode": "nullable", + }, + { + "name": "exceeds1pct509", + "type": "integer", + "description": "Amount support exceeds total (509)", + "mode": "nullable", + }, + { + "name": "subtotpub509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "pubsupplesub509", + "type": "integer", + "description": "Public support (509)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot509", + "type": "integer", + "description": "Public support from line 6 (509)", + "mode": "nullable", + }, + { + "name": "grsinc509", + "type": "integer", + "description": "Gross income from interest etc (509)", + "mode": "nullable", + }, + { + "name": "unreltxincls511tx509", + "type": "integer", + "description": "Net UBI (509)", + "mode": "nullable", + }, + { + "name": "subtotsuppinc509", + "type": "integer", + "description": "Subtotal total support (509)", + "mode": "nullable", + }, + { + "name": "netincunrelatd509", + "type": "integer", + "description": "Net income from UBI not in 10b (509)", + "mode": "nullable", + }, + { + "name": "othrinc509", + "type": "integer", + "description": "Other income (509)", + "mode": "nullable", + }, + { + "name": "totsupp509", + "type": "integer", + "description": "Total support (509)", + "mode": "nullable", + }, + ], + ) + + irs_990_ez_2014_transform_csv >> load_irs_990_ez_2014_to_bq diff --git a/datasets/irs_990/irs_990_ez_2014/pipeline.yaml b/datasets/irs_990/irs_990_ez_2014/pipeline.yaml new file mode 100644 index 000000000..e0dc5c342 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2014/pipeline.yaml @@ -0,0 +1,402 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_ez_2014 + + # Description of the table + description: "irs_990_ez_2014 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_ez_2014 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_ez_2014_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_ez_2014" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/14eofinextract990ez.zip" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_ez_2014/data_output.csv" + PIPELINE_NAME: "irs_990_ez_2014" + CSV_HEADERS: >- + ["ein","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + + task_id: "load_irs_990_ez_2014_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_ez_2014/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_ez_2014" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name : "ein" + type : "string" + description: "Employer Identification Number" + mode : "required" + - name : "tax_pd" + type : "integer" + description: "Tax period" + mode : "nullable" + - name : "subseccd" + type : "integer" + description: "Subsection code" + mode : "nullable" + - name : "totcntrbs" + type : "integer" + description: "Contributions gifts grants etc received" + mode : "nullable" + - name : "prgmservrev" + type : "integer" + description: "Program service revenue" + mode : "nullable" + - name : "duesassesmnts" + type : "integer" + description: "Membership dues and assessments" + mode : "nullable" + - name : "othrinvstinc" + type : "integer" + description: "Investment income" + mode : "nullable" + - name : "grsamtsalesastothr" + type : "integer" + description: "Gross amount from sale of assets" + mode : "nullable" + - name : "basisalesexpnsothr" + type : "integer" + description: "Cost or other basis and sales expenses" + mode : "nullable" + - name : "gnsaleofastothr" + type : "integer" + description: "Gain or (loss) from sale of assets" + mode : "nullable" + - name : "grsincgaming" + type : "integer" + description: "Gross income from gaming" + mode : "nullable" + - name : "grsrevnuefndrsng" + type : "integer" + description: "Special events gross revenue" + mode : "nullable" + - name : "direxpns" + type : "integer" + description: "Special events direct expenses" + mode : "nullable" + - name : "netincfndrsng" + type : "integer" + description: "Special events net income (or loss)" + mode : "nullable" + - name : "grsalesminusret" + type : "integer" + description: "Gross sales of inventory" + mode : "nullable" + - name : "costgoodsold" + type : "integer" + description: "Less: cost of goods sold" + mode : "nullable" + - name : "grsprft" + type : "integer" + description: "Gross profit (or loss) from sales of inventory" + mode : "nullable" + - name : "othrevnue" + type : "integer" + description: "Other revenue - total" + mode : "nullable" + - name : "totrevnue" + type : "integer" + description: "Total revenue" + mode : "nullable" + - name : "totexpns" + type : "integer" + description: "Total expenses" + mode : "nullable" + - name : "totexcessyr" + type : "integer" + description: "Excess or deficit" + mode : "nullable" + - name : "othrchgsnetassetfnd" + type : "integer" + description: "Other changes in net assets" + mode : "nullable" + - name : "networthend" + type : "integer" + description: "Net assets EOY" + mode : "nullable" + - name : "totassetsend" + type : "integer" + description: "Total assets e-o-y" + mode : "nullable" + - name : "totliabend" + type : "integer" + description: "Total liabilities e-o-y" + mode : "nullable" + - name : "totnetassetsend" + type : "integer" + description: "Total net worth e-o-y" + mode : "nullable" + - name : "actvtynotprevrptcd" + type : "string" + description: "Activity not previously reported?" + mode : "nullable" + - name : "chngsinorgcd" + type : "string" + description: "Significant changes to governing docs?" + mode : "nullable" + - name : "unrelbusincd" + type : "string" + description: "UBI over $1000?" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + description: "Organization Filed 990T" + mode : "nullable" + - name : "contractioncd" + type : "string" + description: "Liquidation dissolution termination or contraction" + mode : "nullable" + - name : "politicalexpend" + type : "integer" + description: "Direct or indirect political expenditures" + mode : "nullable" + - name : "filedf1120polcd" + type : "string" + description: "File Form 1120-POL?" + mode : "nullable" + - name : "loanstoofficerscd" + type : "string" + description: "Loans to/from officers directors or trustees?" + mode : "nullable" + - name : "loanstoofficers" + type : "integer" + description: "Amount of loans to/from officers" + mode : "nullable" + - name : "initiationfee" + type : "integer" + description: "Initiation fees and capital contributions" + mode : "nullable" + - name : "grspublicrcpts" + type : "integer" + description: "Gross receipts for public use of club facilities" + mode : "nullable" + - name : "s4958excessbenefcd" + type : "string" + description: "Section 4958 excess benefit transactions?" + mode : "nullable" + - name : "prohibtdtxshltrcd" + type : "string" + description: "Party to a prohibited tax shelter transaction?" + mode : "nullable" + - name : "nonpfrea" + type : "integer" + description: "Reason for non-PF status" + mode : "nullable" + - name : "totnooforgscnt" + type : "integer" + description: "Number of organizations supported" + mode : "nullable" + - name : "totsupport" + type : "integer" + description: "Sum of amounts of support" + mode : "nullable" + - name : "gftgrntsrcvd170" + type : "integer" + description: "Gifts grants membership fees received (170)" + mode : "nullable" + - name : "txrevnuelevied170" + type : "integer" + description: "Tax revenues levied (170)" + mode : "nullable" + - name : "srvcsval170" + type : "integer" + description: "Services or facilities furnished by gov (170)" + mode : "nullable" + - name : "pubsuppsubtot170" + type : "integer" + description: "Public support subtotal (170)" + mode : "nullable" + - name : "exceeds2pct170" + type : "integer" + description: "Amount support exceeds total (170)" + mode : "nullable" + - name : "pubsupplesspct170" + type : "integer" + description: "Public support (170)" + mode : "nullable" + - name : "samepubsuppsubtot170" + type : "integer" + description: "Public support from line 4 (170)" + mode : "nullable" + - name : "grsinc170" + type : "integer" + description: "Gross income from interest etc (170)" + mode : "nullable" + - name : "netincunreltd170" + type : "integer" + description: "Net UBI (170)" + mode : "nullable" + - name : "othrinc170" + type : "integer" + description: "Other income (170)" + mode : "nullable" + - name : "totsupp170" + type : "integer" + description: "Total support (170)" + mode : "nullable" + - name : "grsrcptsrelated170" + type : "integer" + description: "Gross receipts from related activities (170)" + mode : "nullable" + - name : "totgftgrntrcvd509" + type : "integer" + description: "Gifts grants membership fees received (509)" + mode : "nullable" + - name : "grsrcptsadmissn509" + type : "integer" + description: "Receipts from admissions merchandise etc (509)" + mode : "nullable" + - name : "grsrcptsactivities509" + type : "integer" + description: "Gross receipts from related activities (509)" + mode : "nullable" + - name : "txrevnuelevied509" + type : "integer" + description: "Tax revenues levied (509)" + mode : "nullable" + - name : "srvcsval509" + type : "integer" + description: "Services or facilities furnished by gov (509)" + mode : "nullable" + - name : "pubsuppsubtot509" + type : "integer" + description: "Public support subtotal (509)" + mode : "nullable" + - name : "rcvdfrmdisqualsub509" + type : "integer" + description: "Amounts from disqualified persons (509)" + mode : "nullable" + - name : "exceeds1pct509" + type : "integer" + description: "Amount support exceeds total (509)" + mode : "nullable" + - name : "subtotpub509" + type : "integer" + description: "Public support subtotal (509)" + mode : "nullable" + - name : "pubsupplesub509" + type : "integer" + description: "Public support (509)" + mode : "nullable" + - name : "samepubsuppsubtot509" + type : "integer" + description: "Public support from line 6 (509)" + mode : "nullable" + - name : "grsinc509" + type : "integer" + description: "Gross income from interest etc (509)" + mode : "nullable" + - name : "unreltxincls511tx509" + type : "integer" + description: "Net UBI (509)" + mode : "nullable" + - name : "subtotsuppinc509" + type : "integer" + description: "Subtotal total support (509)" + mode : "nullable" + - name : "netincunrelatd509" + type : "integer" + description: "Net income from UBI not in 10b (509)" + mode : "nullable" + - name : "othrinc509" + type : "integer" + description: "Other income (509)" + mode : "nullable" + - name : "totsupp509" + type : "integer" + description: "Total support (509)" + mode : "nullable" + + + graph_paths: + - "irs_990_ez_2014_transform_csv >> load_irs_990_ez_2014_to_bq" + + + + + \ No newline at end of file diff --git a/datasets/irs_990/irs_990_ez_2015/irs_990_ez_2015_dag.py b/datasets/irs_990/irs_990_ez_2015/irs_990_ez_2015_dag.py new file mode 100644 index 000000000..6e9e3cdb1 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2015/irs_990_ez_2015_dag.py @@ -0,0 +1,501 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_ez_2015", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_ez_2015_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_ez_2015_transform_csv", + startup_timeout_seconds=600, + name="irs_990_ez_2015", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/15eofinextractEZ.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_ez_2015/data_output.csv", + "PIPELINE_NAME": "irs_990_ez_2015", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', + }, + resources={"request_memory": "2G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_ez_2015_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_ez_2015_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_ez_2015/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_ez_2015", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_pd", + "type": "integer", + "description": "Tax period", + "mode": "nullable", + }, + { + "name": "subseccd", + "type": "integer", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "totcntrbs", + "type": "integer", + "description": "Contributions gifts grants etc received", + "mode": "nullable", + }, + { + "name": "prgmservrev", + "type": "integer", + "description": "Program service revenue", + "mode": "nullable", + }, + { + "name": "duesassesmnts", + "type": "integer", + "description": "Membership dues and assessments", + "mode": "nullable", + }, + { + "name": "othrinvstinc", + "type": "integer", + "description": "Investment income", + "mode": "nullable", + }, + { + "name": "grsamtsalesastothr", + "type": "integer", + "description": "Gross amount from sale of assets", + "mode": "nullable", + }, + { + "name": "basisalesexpnsothr", + "type": "integer", + "description": "Cost or other basis and sales expenses", + "mode": "nullable", + }, + { + "name": "gnsaleofastothr", + "type": "integer", + "description": "Gain or (loss) from sale of assets", + "mode": "nullable", + }, + { + "name": "grsincgaming", + "type": "integer", + "description": "Gross income from gaming", + "mode": "nullable", + }, + { + "name": "grsrevnuefndrsng", + "type": "integer", + "description": "Special events gross revenue", + "mode": "nullable", + }, + { + "name": "direxpns", + "type": "integer", + "description": "Special events direct expenses", + "mode": "nullable", + }, + { + "name": "netincfndrsng", + "type": "integer", + "description": "Special events net income (or loss)", + "mode": "nullable", + }, + { + "name": "grsalesminusret", + "type": "integer", + "description": "Gross sales of inventory", + "mode": "nullable", + }, + { + "name": "costgoodsold", + "type": "integer", + "description": "Less: cost of goods sold", + "mode": "nullable", + }, + { + "name": "grsprft", + "type": "integer", + "description": "Gross profit (or loss) from sales of inventory", + "mode": "nullable", + }, + { + "name": "othrevnue", + "type": "integer", + "description": "Other revenue - total", + "mode": "nullable", + }, + { + "name": "totrevnue", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "totexpns", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "totexcessyr", + "type": "integer", + "description": "Excess or deficit", + "mode": "nullable", + }, + { + "name": "othrchgsnetassetfnd", + "type": "integer", + "description": "Other changes in net assets", + "mode": "nullable", + }, + { + "name": "networthend", + "type": "integer", + "description": "Net assets EOY", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets e-o-y", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities e-o-y", + "mode": "nullable", + }, + { + "name": "totnetassetsend", + "type": "integer", + "description": "Total net worth e-o-y", + "mode": "nullable", + }, + { + "name": "actvtynotprevrptcd", + "type": "string", + "description": "Activity not previously reported?", + "mode": "nullable", + }, + { + "name": "chngsinorgcd", + "type": "string", + "description": "Significant changes to governing docs?", + "mode": "nullable", + }, + { + "name": "unrelbusincd", + "type": "string", + "description": "UBI over $1000?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Organization Filed 990T", + "mode": "nullable", + }, + { + "name": "contractioncd", + "type": "string", + "description": "Liquidation dissolution termination or contraction", + "mode": "nullable", + }, + { + "name": "politicalexpend", + "type": "integer", + "description": "Direct or indirect political expenditures", + "mode": "nullable", + }, + { + "name": "filedf1120polcd", + "type": "string", + "description": "File Form 1120-POL?", + "mode": "nullable", + }, + { + "name": "loanstoofficerscd", + "type": "string", + "description": "Loans to/from officers directors or trustees?", + "mode": "nullable", + }, + { + "name": "loanstoofficers", + "type": "integer", + "description": "Amount of loans to/from officers", + "mode": "nullable", + }, + { + "name": "initiationfee", + "type": "integer", + "description": "Initiation fees and capital contributions", + "mode": "nullable", + }, + { + "name": "grspublicrcpts", + "type": "integer", + "description": "Gross receipts for public use of club facilities", + "mode": "nullable", + }, + { + "name": "s4958excessbenefcd", + "type": "string", + "description": "Section 4958 excess benefit transactions?", + "mode": "nullable", + }, + { + "name": "prohibtdtxshltrcd", + "type": "string", + "description": "Party to a prohibited tax shelter transaction?", + "mode": "nullable", + }, + { + "name": "nonpfrea", + "type": "integer", + "description": "Reason for non-PF status", + "mode": "nullable", + }, + { + "name": "totnooforgscnt", + "type": "integer", + "description": "Number of organizations supported", + "mode": "nullable", + }, + { + "name": "totsupport", + "type": "integer", + "description": "Sum of amounts of support", + "mode": "nullable", + }, + { + "name": "gftgrntsrcvd170", + "type": "integer", + "description": "Gifts grants membership fees received (170)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied170", + "type": "integer", + "description": "Tax revenues levied (170)", + "mode": "nullable", + }, + { + "name": "srvcsval170", + "type": "integer", + "description": "Services or facilities furnished by gov (170)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot170", + "type": "integer", + "description": "Public support subtotal (170)", + "mode": "nullable", + }, + { + "name": "exceeds2pct170", + "type": "integer", + "description": "Amount support exceeds total (170)", + "mode": "nullable", + }, + { + "name": "pubsupplesspct170", + "type": "integer", + "description": "Public support (170)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot170", + "type": "integer", + "description": "Public support from line 4 (170)", + "mode": "nullable", + }, + { + "name": "grsinc170", + "type": "integer", + "description": "Gross income from interest etc (170)", + "mode": "nullable", + }, + { + "name": "netincunreltd170", + "type": "integer", + "description": "Net UBI (170)", + "mode": "nullable", + }, + { + "name": "othrinc170", + "type": "integer", + "description": "Other income (170)", + "mode": "nullable", + }, + { + "name": "totsupp170", + "type": "integer", + "description": "Total support (170)", + "mode": "nullable", + }, + { + "name": "grsrcptsrelated170", + "type": "integer", + "description": "Gross receipts from related activities (170)", + "mode": "nullable", + }, + { + "name": "totgftgrntrcvd509", + "type": "integer", + "description": "Gifts grants membership fees received (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsadmissn509", + "type": "integer", + "description": "Receipts from admissions merchandise etc (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsactivities509", + "type": "integer", + "description": "Gross receipts from related activities (509)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied509", + "type": "integer", + "description": "Tax revenues levied (509)", + "mode": "nullable", + }, + { + "name": "srvcsval509", + "type": "integer", + "description": "Services or facilities furnished by gov (509)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "rcvdfrmdisqualsub509", + "type": "integer", + "description": "Amounts from disqualified persons (509)", + "mode": "nullable", + }, + { + "name": "exceeds1pct509", + "type": "integer", + "description": "Amount support exceeds total (509)", + "mode": "nullable", + }, + { + "name": "subtotpub509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "pubsupplesub509", + "type": "integer", + "description": "Public support (509)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot509", + "type": "integer", + "description": "Public support from line 6 (509)", + "mode": "nullable", + }, + { + "name": "grsinc509", + "type": "integer", + "description": "Gross income from interest etc (509)", + "mode": "nullable", + }, + { + "name": "unreltxincls511tx509", + "type": "integer", + "description": "Net UBI (509)", + "mode": "nullable", + }, + { + "name": "subtotsuppinc509", + "type": "integer", + "description": "Subtotal total support (509)", + "mode": "nullable", + }, + { + "name": "netincunrelatd509", + "type": "integer", + "description": "Net income from UBI not in 10b (509)", + "mode": "nullable", + }, + { + "name": "othrinc509", + "type": "integer", + "description": "Other income (509)", + "mode": "nullable", + }, + { + "name": "totsupp509", + "type": "integer", + "description": "Total support (509)", + "mode": "nullable", + }, + ], + ) + + irs_990_ez_2015_transform_csv >> load_irs_990_ez_2015_to_bq diff --git a/datasets/irs_990/irs_990_ez_2015/pipeline.yaml b/datasets/irs_990/irs_990_ez_2015/pipeline.yaml new file mode 100644 index 000000000..b5ef42660 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2015/pipeline.yaml @@ -0,0 +1,406 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_ez_2015 + + # Description of the table + description: "irs_990_ez_2015 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_ez_2015 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_ez_2015_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_ez_2015" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/15eofinextractEZ.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_ez_2015/data_output.csv" + PIPELINE_NAME: "irs_990_ez_2015" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "2G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + + task_id: "load_irs_990_ez_2015_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_ez_2015/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_ez_2015" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name : "ein" + type : "string" + description: "Employer Identification Number" + mode : "required" + - name : "elf" + type : "string" + description: "E-file indicator" + mode : "nullable" + - name : "tax_pd" + type : "integer" + description: "Tax period" + mode : "nullable" + - name : "subseccd" + type : "integer" + description: "Subsection code" + mode : "nullable" + - name : "totcntrbs" + type : "integer" + description: "Contributions gifts grants etc received" + mode : "nullable" + - name : "prgmservrev" + type : "integer" + description: "Program service revenue" + mode : "nullable" + - name : "duesassesmnts" + type : "integer" + description: "Membership dues and assessments" + mode : "nullable" + - name : "othrinvstinc" + type : "integer" + description: "Investment income" + mode : "nullable" + - name : "grsamtsalesastothr" + type : "integer" + description: "Gross amount from sale of assets" + mode : "nullable" + - name : "basisalesexpnsothr" + type : "integer" + description: "Cost or other basis and sales expenses" + mode : "nullable" + - name : "gnsaleofastothr" + type : "integer" + description: "Gain or (loss) from sale of assets" + mode : "nullable" + - name : "grsincgaming" + type : "integer" + description: "Gross income from gaming" + mode : "nullable" + - name : "grsrevnuefndrsng" + type : "integer" + description: "Special events gross revenue" + mode : "nullable" + - name : "direxpns" + type : "integer" + description: "Special events direct expenses" + mode : "nullable" + - name : "netincfndrsng" + type : "integer" + description: "Special events net income (or loss)" + mode : "nullable" + - name : "grsalesminusret" + type : "integer" + description: "Gross sales of inventory" + mode : "nullable" + - name : "costgoodsold" + type : "integer" + description: "Less: cost of goods sold" + mode : "nullable" + - name : "grsprft" + type : "integer" + description: "Gross profit (or loss) from sales of inventory" + mode : "nullable" + - name : "othrevnue" + type : "integer" + description: "Other revenue - total" + mode : "nullable" + - name : "totrevnue" + type : "integer" + description: "Total revenue" + mode : "nullable" + - name : "totexpns" + type : "integer" + description: "Total expenses" + mode : "nullable" + - name : "totexcessyr" + type : "integer" + description: "Excess or deficit" + mode : "nullable" + - name : "othrchgsnetassetfnd" + type : "integer" + description: "Other changes in net assets" + mode : "nullable" + - name : "networthend" + type : "integer" + description: "Net assets EOY" + mode : "nullable" + - name : "totassetsend" + type : "integer" + description: "Total assets e-o-y" + mode : "nullable" + - name : "totliabend" + type : "integer" + description: "Total liabilities e-o-y" + mode : "nullable" + - name : "totnetassetsend" + type : "integer" + description: "Total net worth e-o-y" + mode : "nullable" + - name : "actvtynotprevrptcd" + type : "string" + description: "Activity not previously reported?" + mode : "nullable" + - name : "chngsinorgcd" + type : "string" + description: "Significant changes to governing docs?" + mode : "nullable" + - name : "unrelbusincd" + type : "string" + description: "UBI over $1000?" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + description: "Organization Filed 990T" + mode : "nullable" + - name : "contractioncd" + type : "string" + description: "Liquidation dissolution termination or contraction" + mode : "nullable" + - name : "politicalexpend" + type : "integer" + description: "Direct or indirect political expenditures" + mode : "nullable" + - name : "filedf1120polcd" + type : "string" + description: "File Form 1120-POL?" + mode : "nullable" + - name : "loanstoofficerscd" + type : "string" + description: "Loans to/from officers directors or trustees?" + mode : "nullable" + - name : "loanstoofficers" + type : "integer" + description: "Amount of loans to/from officers" + mode : "nullable" + - name : "initiationfee" + type : "integer" + description: "Initiation fees and capital contributions" + mode : "nullable" + - name : "grspublicrcpts" + type : "integer" + description: "Gross receipts for public use of club facilities" + mode : "nullable" + - name : "s4958excessbenefcd" + type : "string" + description: "Section 4958 excess benefit transactions?" + mode : "nullable" + - name : "prohibtdtxshltrcd" + type : "string" + description: "Party to a prohibited tax shelter transaction?" + mode : "nullable" + - name : "nonpfrea" + type : "integer" + description: "Reason for non-PF status" + mode : "nullable" + - name : "totnooforgscnt" + type : "integer" + description: "Number of organizations supported" + mode : "nullable" + - name : "totsupport" + type : "integer" + description: "Sum of amounts of support" + mode : "nullable" + - name : "gftgrntsrcvd170" + type : "integer" + description: "Gifts grants membership fees received (170)" + mode : "nullable" + - name : "txrevnuelevied170" + type : "integer" + description: "Tax revenues levied (170)" + mode : "nullable" + - name : "srvcsval170" + type : "integer" + description: "Services or facilities furnished by gov (170)" + mode : "nullable" + - name : "pubsuppsubtot170" + type : "integer" + description: "Public support subtotal (170)" + mode : "nullable" + - name : "exceeds2pct170" + type : "integer" + description: "Amount support exceeds total (170)" + mode : "nullable" + - name : "pubsupplesspct170" + type : "integer" + description: "Public support (170)" + mode : "nullable" + - name : "samepubsuppsubtot170" + type : "integer" + description: "Public support from line 4 (170)" + mode : "nullable" + - name : "grsinc170" + type : "integer" + description: "Gross income from interest etc (170)" + mode : "nullable" + - name : "netincunreltd170" + type : "integer" + description: "Net UBI (170)" + mode : "nullable" + - name : "othrinc170" + type : "integer" + description: "Other income (170)" + mode : "nullable" + - name : "totsupp170" + type : "integer" + description: "Total support (170)" + mode : "nullable" + - name : "grsrcptsrelated170" + type : "integer" + description: "Gross receipts from related activities (170)" + mode : "nullable" + - name : "totgftgrntrcvd509" + type : "integer" + description: "Gifts grants membership fees received (509)" + mode : "nullable" + - name : "grsrcptsadmissn509" + type : "integer" + description: "Receipts from admissions merchandise etc (509)" + mode : "nullable" + - name : "grsrcptsactivities509" + type : "integer" + description: "Gross receipts from related activities (509)" + mode : "nullable" + - name : "txrevnuelevied509" + type : "integer" + description: "Tax revenues levied (509)" + mode : "nullable" + - name : "srvcsval509" + type : "integer" + description: "Services or facilities furnished by gov (509)" + mode : "nullable" + - name : "pubsuppsubtot509" + type : "integer" + description: "Public support subtotal (509)" + mode : "nullable" + - name : "rcvdfrmdisqualsub509" + type : "integer" + description: "Amounts from disqualified persons (509)" + mode : "nullable" + - name : "exceeds1pct509" + type : "integer" + description: "Amount support exceeds total (509)" + mode : "nullable" + - name : "subtotpub509" + type : "integer" + description: "Public support subtotal (509)" + mode : "nullable" + - name : "pubsupplesub509" + type : "integer" + description: "Public support (509)" + mode : "nullable" + - name : "samepubsuppsubtot509" + type : "integer" + description: "Public support from line 6 (509)" + mode : "nullable" + - name : "grsinc509" + type : "integer" + description: "Gross income from interest etc (509)" + mode : "nullable" + - name : "unreltxincls511tx509" + type : "integer" + description: "Net UBI (509)" + mode : "nullable" + - name : "subtotsuppinc509" + type : "integer" + description: "Subtotal total support (509)" + mode : "nullable" + - name : "netincunrelatd509" + type : "integer" + description: "Net income from UBI not in 10b (509)" + mode : "nullable" + - name : "othrinc509" + type : "integer" + description: "Other income (509)" + mode : "nullable" + - name : "totsupp509" + type : "integer" + description: "Total support (509)" + mode : "nullable" + + + graph_paths: + - "irs_990_ez_2015_transform_csv >> load_irs_990_ez_2015_to_bq" + + + + + \ No newline at end of file diff --git a/datasets/irs_990/irs_990_ez_2016/irs_990_ez_2016_dag.py b/datasets/irs_990/irs_990_ez_2016/irs_990_ez_2016_dag.py new file mode 100644 index 000000000..f741c65a1 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2016/irs_990_ez_2016_dag.py @@ -0,0 +1,501 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_ez_2016", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_ez_2016_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_ez_2016_transform_csv", + startup_timeout_seconds=600, + name="irs_990_ez_2016", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/16eofinextractez.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_ez_2016/data_output.csv", + "PIPELINE_NAME": "irs_990_ez_2016", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_ez_2016_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_ez_2016_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_ez_2016/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_ez_2016", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_pd", + "type": "integer", + "description": "Tax period", + "mode": "nullable", + }, + { + "name": "subseccd", + "type": "integer", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "totcntrbs", + "type": "integer", + "description": "Contributions gifts grants etc received", + "mode": "nullable", + }, + { + "name": "prgmservrev", + "type": "integer", + "description": "Program service revenue", + "mode": "nullable", + }, + { + "name": "duesassesmnts", + "type": "integer", + "description": "Membership dues and assessments", + "mode": "nullable", + }, + { + "name": "othrinvstinc", + "type": "integer", + "description": "Investment income", + "mode": "nullable", + }, + { + "name": "grsamtsalesastothr", + "type": "integer", + "description": "Gross amount from sale of assets", + "mode": "nullable", + }, + { + "name": "basisalesexpnsothr", + "type": "integer", + "description": "Cost or other basis and sales expenses", + "mode": "nullable", + }, + { + "name": "gnsaleofastothr", + "type": "integer", + "description": "Gain or (loss) from sale of assets", + "mode": "nullable", + }, + { + "name": "grsincgaming", + "type": "integer", + "description": "Gross income from gaming", + "mode": "nullable", + }, + { + "name": "grsrevnuefndrsng", + "type": "integer", + "description": "Special events gross revenue", + "mode": "nullable", + }, + { + "name": "direxpns", + "type": "integer", + "description": "Special events direct expenses", + "mode": "nullable", + }, + { + "name": "netincfndrsng", + "type": "integer", + "description": "Special events net income (or loss)", + "mode": "nullable", + }, + { + "name": "grsalesminusret", + "type": "integer", + "description": "Gross sales of inventory", + "mode": "nullable", + }, + { + "name": "costgoodsold", + "type": "integer", + "description": "Less: cost of goods sold", + "mode": "nullable", + }, + { + "name": "grsprft", + "type": "integer", + "description": "Gross profit (or loss) from sales of inventory", + "mode": "nullable", + }, + { + "name": "othrevnue", + "type": "integer", + "description": "Other revenue - total", + "mode": "nullable", + }, + { + "name": "totrevnue", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "totexpns", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "totexcessyr", + "type": "integer", + "description": "Excess or deficit", + "mode": "nullable", + }, + { + "name": "othrchgsnetassetfnd", + "type": "integer", + "description": "Other changes in net assets", + "mode": "nullable", + }, + { + "name": "networthend", + "type": "integer", + "description": "Net assets EOY", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets e-o-y", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities e-o-y", + "mode": "nullable", + }, + { + "name": "totnetassetsend", + "type": "integer", + "description": "Total net worth e-o-y", + "mode": "nullable", + }, + { + "name": "actvtynotprevrptcd", + "type": "string", + "description": "Activity not previously reported?", + "mode": "nullable", + }, + { + "name": "chngsinorgcd", + "type": "string", + "description": "Significant changes to governing docs?", + "mode": "nullable", + }, + { + "name": "unrelbusincd", + "type": "string", + "description": "UBI over $1000?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Organization Filed 990T", + "mode": "nullable", + }, + { + "name": "contractioncd", + "type": "string", + "description": "Liquidation dissolution termination or contraction", + "mode": "nullable", + }, + { + "name": "politicalexpend", + "type": "integer", + "description": "Direct or indirect political expenditures", + "mode": "nullable", + }, + { + "name": "filedf1120polcd", + "type": "string", + "description": "File Form 1120-POL?", + "mode": "nullable", + }, + { + "name": "loanstoofficerscd", + "type": "string", + "description": "Loans to/from officers directors or trustees?", + "mode": "nullable", + }, + { + "name": "loanstoofficers", + "type": "integer", + "description": "Amount of loans to/from officers", + "mode": "nullable", + }, + { + "name": "initiationfee", + "type": "integer", + "description": "Initiation fees and capital contributions", + "mode": "nullable", + }, + { + "name": "grspublicrcpts", + "type": "integer", + "description": "Gross receipts for public use of club facilities", + "mode": "nullable", + }, + { + "name": "s4958excessbenefcd", + "type": "string", + "description": "Section 4958 excess benefit transactions?", + "mode": "nullable", + }, + { + "name": "prohibtdtxshltrcd", + "type": "string", + "description": "Party to a prohibited tax shelter transaction?", + "mode": "nullable", + }, + { + "name": "nonpfrea", + "type": "integer", + "description": "Reason for non-PF status", + "mode": "nullable", + }, + { + "name": "totnooforgscnt", + "type": "integer", + "description": "Number of organizations supported", + "mode": "nullable", + }, + { + "name": "totsupport", + "type": "integer", + "description": "Sum of amounts of support", + "mode": "nullable", + }, + { + "name": "gftgrntsrcvd170", + "type": "integer", + "description": "Gifts grants membership fees received (170)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied170", + "type": "integer", + "description": "Tax revenues levied (170)", + "mode": "nullable", + }, + { + "name": "srvcsval170", + "type": "integer", + "description": "Services or facilities furnished by gov (170)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot170", + "type": "integer", + "description": "Public support subtotal (170)", + "mode": "nullable", + }, + { + "name": "exceeds2pct170", + "type": "integer", + "description": "Amount support exceeds total (170)", + "mode": "nullable", + }, + { + "name": "pubsupplesspct170", + "type": "integer", + "description": "Public support (170)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot170", + "type": "integer", + "description": "Public support from line 4 (170)", + "mode": "nullable", + }, + { + "name": "grsinc170", + "type": "integer", + "description": "Gross income from interest etc (170)", + "mode": "nullable", + }, + { + "name": "netincunreltd170", + "type": "integer", + "description": "Net UBI (170)", + "mode": "nullable", + }, + { + "name": "othrinc170", + "type": "integer", + "description": "Other income (170)", + "mode": "nullable", + }, + { + "name": "totsupp170", + "type": "integer", + "description": "Total support (170)", + "mode": "nullable", + }, + { + "name": "grsrcptsrelated170", + "type": "integer", + "description": "Gross receipts from related activities (170)", + "mode": "nullable", + }, + { + "name": "totgftgrntrcvd509", + "type": "integer", + "description": "Gifts grants membership fees received (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsadmissn509", + "type": "integer", + "description": "Receipts from admissions merchandise etc (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsactivities509", + "type": "integer", + "description": "Gross receipts from related activities (509)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied509", + "type": "integer", + "description": "Tax revenues levied (509)", + "mode": "nullable", + }, + { + "name": "srvcsval509", + "type": "integer", + "description": "Services or facilities furnished by gov (509)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "rcvdfrmdisqualsub509", + "type": "integer", + "description": "Amounts from disqualified persons (509)", + "mode": "nullable", + }, + { + "name": "exceeds1pct509", + "type": "integer", + "description": "Amount support exceeds total (509)", + "mode": "nullable", + }, + { + "name": "subtotpub509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "pubsupplesub509", + "type": "integer", + "description": "Public support (509)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot509", + "type": "integer", + "description": "Public support from line 6 (509)", + "mode": "nullable", + }, + { + "name": "grsinc509", + "type": "integer", + "description": "Gross income from interest etc (509)", + "mode": "nullable", + }, + { + "name": "unreltxincls511tx509", + "type": "integer", + "description": "Net UBI (509)", + "mode": "nullable", + }, + { + "name": "subtotsuppinc509", + "type": "integer", + "description": "Subtotal total support (509)", + "mode": "nullable", + }, + { + "name": "netincunrelatd509", + "type": "integer", + "description": "Net income from UBI not in 10b (509)", + "mode": "nullable", + }, + { + "name": "othrinc509", + "type": "integer", + "description": "Other income (509)", + "mode": "nullable", + }, + { + "name": "totsupp509", + "type": "integer", + "description": "Total support (509)", + "mode": "nullable", + }, + ], + ) + + irs_990_ez_2016_transform_csv >> load_irs_990_ez_2016_to_bq diff --git a/datasets/irs_990/irs_990_ez_2016/pipeline.yaml b/datasets/irs_990/irs_990_ez_2016/pipeline.yaml new file mode 100644 index 000000000..7882ff022 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2016/pipeline.yaml @@ -0,0 +1,406 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_ez_2016 + + # Description of the table + description: "irs_990_ez_2016 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_ez_2016 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_ez_2016_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_ez_2016" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/16eofinextractez.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_ez_2016/data_output.csv" + PIPELINE_NAME: "irs_990_ez_2016" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + + task_id: "load_irs_990_ez_2016_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_ez_2016/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_ez_2016" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name : "ein" + type : "string" + description: "Employer Identification Number" + mode : "required" + - name : "elf" + type : "string" + description: "E-file indicator" + mode : "nullable" + - name : "tax_pd" + type : "integer" + description: "Tax period" + mode : "nullable" + - name : "subseccd" + type : "integer" + description: "Subsection code" + mode : "nullable" + - name : "totcntrbs" + type : "integer" + description: "Contributions gifts grants etc received" + mode : "nullable" + - name : "prgmservrev" + type : "integer" + description: "Program service revenue" + mode : "nullable" + - name : "duesassesmnts" + type : "integer" + description: "Membership dues and assessments" + mode : "nullable" + - name : "othrinvstinc" + type : "integer" + description: "Investment income" + mode : "nullable" + - name : "grsamtsalesastothr" + type : "integer" + description: "Gross amount from sale of assets" + mode : "nullable" + - name : "basisalesexpnsothr" + type : "integer" + description: "Cost or other basis and sales expenses" + mode : "nullable" + - name : "gnsaleofastothr" + type : "integer" + description: "Gain or (loss) from sale of assets" + mode : "nullable" + - name : "grsincgaming" + type : "integer" + description: "Gross income from gaming" + mode : "nullable" + - name : "grsrevnuefndrsng" + type : "integer" + description: "Special events gross revenue" + mode : "nullable" + - name : "direxpns" + type : "integer" + description: "Special events direct expenses" + mode : "nullable" + - name : "netincfndrsng" + type : "integer" + description: "Special events net income (or loss)" + mode : "nullable" + - name : "grsalesminusret" + type : "integer" + description: "Gross sales of inventory" + mode : "nullable" + - name : "costgoodsold" + type : "integer" + description: "Less: cost of goods sold" + mode : "nullable" + - name : "grsprft" + type : "integer" + description: "Gross profit (or loss) from sales of inventory" + mode : "nullable" + - name : "othrevnue" + type : "integer" + description: "Other revenue - total" + mode : "nullable" + - name : "totrevnue" + type : "integer" + description: "Total revenue" + mode : "nullable" + - name : "totexpns" + type : "integer" + description: "Total expenses" + mode : "nullable" + - name : "totexcessyr" + type : "integer" + description: "Excess or deficit" + mode : "nullable" + - name : "othrchgsnetassetfnd" + type : "integer" + description: "Other changes in net assets" + mode : "nullable" + - name : "networthend" + type : "integer" + description: "Net assets EOY" + mode : "nullable" + - name : "totassetsend" + type : "integer" + description: "Total assets e-o-y" + mode : "nullable" + - name : "totliabend" + type : "integer" + description: "Total liabilities e-o-y" + mode : "nullable" + - name : "totnetassetsend" + type : "integer" + description: "Total net worth e-o-y" + mode : "nullable" + - name : "actvtynotprevrptcd" + type : "string" + description: "Activity not previously reported?" + mode : "nullable" + - name : "chngsinorgcd" + type : "string" + description: "Significant changes to governing docs?" + mode : "nullable" + - name : "unrelbusincd" + type : "string" + description: "UBI over $1000?" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + description: "Organization Filed 990T" + mode : "nullable" + - name : "contractioncd" + type : "string" + description: "Liquidation dissolution termination or contraction" + mode : "nullable" + - name : "politicalexpend" + type : "integer" + description: "Direct or indirect political expenditures" + mode : "nullable" + - name : "filedf1120polcd" + type : "string" + description: "File Form 1120-POL?" + mode : "nullable" + - name : "loanstoofficerscd" + type : "string" + description: "Loans to/from officers directors or trustees?" + mode : "nullable" + - name : "loanstoofficers" + type : "integer" + description: "Amount of loans to/from officers" + mode : "nullable" + - name : "initiationfee" + type : "integer" + description: "Initiation fees and capital contributions" + mode : "nullable" + - name : "grspublicrcpts" + type : "integer" + description: "Gross receipts for public use of club facilities" + mode : "nullable" + - name : "s4958excessbenefcd" + type : "string" + description: "Section 4958 excess benefit transactions?" + mode : "nullable" + - name : "prohibtdtxshltrcd" + type : "string" + description: "Party to a prohibited tax shelter transaction?" + mode : "nullable" + - name : "nonpfrea" + type : "integer" + description: "Reason for non-PF status" + mode : "nullable" + - name : "totnooforgscnt" + type : "integer" + description: "Number of organizations supported" + mode : "nullable" + - name : "totsupport" + type : "integer" + description: "Sum of amounts of support" + mode : "nullable" + - name : "gftgrntsrcvd170" + type : "integer" + description: "Gifts grants membership fees received (170)" + mode : "nullable" + - name : "txrevnuelevied170" + type : "integer" + description: "Tax revenues levied (170)" + mode : "nullable" + - name : "srvcsval170" + type : "integer" + description: "Services or facilities furnished by gov (170)" + mode : "nullable" + - name : "pubsuppsubtot170" + type : "integer" + description: "Public support subtotal (170)" + mode : "nullable" + - name : "exceeds2pct170" + type : "integer" + description: "Amount support exceeds total (170)" + mode : "nullable" + - name : "pubsupplesspct170" + type : "integer" + description: "Public support (170)" + mode : "nullable" + - name : "samepubsuppsubtot170" + type : "integer" + description: "Public support from line 4 (170)" + mode : "nullable" + - name : "grsinc170" + type : "integer" + description: "Gross income from interest etc (170)" + mode : "nullable" + - name : "netincunreltd170" + type : "integer" + description: "Net UBI (170)" + mode : "nullable" + - name : "othrinc170" + type : "integer" + description: "Other income (170)" + mode : "nullable" + - name : "totsupp170" + type : "integer" + description: "Total support (170)" + mode : "nullable" + - name : "grsrcptsrelated170" + type : "integer" + description: "Gross receipts from related activities (170)" + mode : "nullable" + - name : "totgftgrntrcvd509" + type : "integer" + description: "Gifts grants membership fees received (509)" + mode : "nullable" + - name : "grsrcptsadmissn509" + type : "integer" + description: "Receipts from admissions merchandise etc (509)" + mode : "nullable" + - name : "grsrcptsactivities509" + type : "integer" + description: "Gross receipts from related activities (509)" + mode : "nullable" + - name : "txrevnuelevied509" + type : "integer" + description: "Tax revenues levied (509)" + mode : "nullable" + - name : "srvcsval509" + type : "integer" + description: "Services or facilities furnished by gov (509)" + mode : "nullable" + - name : "pubsuppsubtot509" + type : "integer" + description: "Public support subtotal (509)" + mode : "nullable" + - name : "rcvdfrmdisqualsub509" + type : "integer" + description: "Amounts from disqualified persons (509)" + mode : "nullable" + - name : "exceeds1pct509" + type : "integer" + description: "Amount support exceeds total (509)" + mode : "nullable" + - name : "subtotpub509" + type : "integer" + description: "Public support subtotal (509)" + mode : "nullable" + - name : "pubsupplesub509" + type : "integer" + description: "Public support (509)" + mode : "nullable" + - name : "samepubsuppsubtot509" + type : "integer" + description: "Public support from line 6 (509)" + mode : "nullable" + - name : "grsinc509" + type : "integer" + description: "Gross income from interest etc (509)" + mode : "nullable" + - name : "unreltxincls511tx509" + type : "integer" + description: "Net UBI (509)" + mode : "nullable" + - name : "subtotsuppinc509" + type : "integer" + description: "Subtotal total support (509)" + mode : "nullable" + - name : "netincunrelatd509" + type : "integer" + description: "Net income from UBI not in 10b (509)" + mode : "nullable" + - name : "othrinc509" + type : "integer" + description: "Other income (509)" + mode : "nullable" + - name : "totsupp509" + type : "integer" + description: "Total support (509)" + mode : "nullable" + + + graph_paths: + - "irs_990_ez_2016_transform_csv >> load_irs_990_ez_2016_to_bq" + + + + + \ No newline at end of file diff --git a/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py b/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py new file mode 100644 index 000000000..c2d52e4c4 --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py @@ -0,0 +1,501 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_ez_2017", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_ez_2017_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_ez_2017_transform_csv", + startup_timeout_seconds=600, + name="irs_990_ez_2017", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/17eofinextractEZ.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_ez_2017/data_output.csv", + "PIPELINE_NAME": "irs_990_ez_2017", + "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', + "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfeecerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_ez_2017_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_ez_2017_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_ez_2017/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_ez_2017", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_pd", + "type": "integer", + "description": "Tax period", + "mode": "nullable", + }, + { + "name": "subseccd", + "type": "integer", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "totcntrbs", + "type": "integer", + "description": "Contributions gifts grants etc received", + "mode": "nullable", + }, + { + "name": "prgmservrev", + "type": "integer", + "description": "Program service revenue", + "mode": "nullable", + }, + { + "name": "duesassesmnts", + "type": "integer", + "description": "Membership dues and assessments", + "mode": "nullable", + }, + { + "name": "othrinvstinc", + "type": "integer", + "description": "Investment income", + "mode": "nullable", + }, + { + "name": "grsamtsalesastothr", + "type": "integer", + "description": "Gross amount from sale of assets", + "mode": "nullable", + }, + { + "name": "basisalesexpnsothr", + "type": "integer", + "description": "Cost or other basis and sales expenses", + "mode": "nullable", + }, + { + "name": "gnsaleofastothr", + "type": "integer", + "description": "Gain or (loss) from sale of assets", + "mode": "nullable", + }, + { + "name": "grsincgaming", + "type": "integer", + "description": "Gross income from gaming", + "mode": "nullable", + }, + { + "name": "grsrevnuefndrsng", + "type": "integer", + "description": "Special events gross revenue", + "mode": "nullable", + }, + { + "name": "direxpns", + "type": "integer", + "description": "Special events direct expenses", + "mode": "nullable", + }, + { + "name": "netincfndrsng", + "type": "integer", + "description": "Special events net income (or loss)", + "mode": "nullable", + }, + { + "name": "grsalesminusret", + "type": "integer", + "description": "Gross sales of inventory", + "mode": "nullable", + }, + { + "name": "costgoodsold", + "type": "integer", + "description": "Less: cost of goods sold", + "mode": "nullable", + }, + { + "name": "grsprft", + "type": "integer", + "description": "Gross profit (or loss) from sales of inventory", + "mode": "nullable", + }, + { + "name": "othrevnue", + "type": "integer", + "description": "Other revenue - total", + "mode": "nullable", + }, + { + "name": "totrevnue", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "totexpns", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "totexcessyr", + "type": "integer", + "description": "Excess or deficit", + "mode": "nullable", + }, + { + "name": "othrchgsnetassetfnd", + "type": "integer", + "description": "Other changes in net assets", + "mode": "nullable", + }, + { + "name": "networthend", + "type": "integer", + "description": "Net assets EOY", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets e-o-y", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities e-o-y", + "mode": "nullable", + }, + { + "name": "totnetassetsend", + "type": "integer", + "description": "Total net worth e-o-y", + "mode": "nullable", + }, + { + "name": "actvtynotprevrptcd", + "type": "string", + "description": "Activity not previously reported?", + "mode": "nullable", + }, + { + "name": "chngsinorgcd", + "type": "string", + "description": "Significant changes to governing docs?", + "mode": "nullable", + }, + { + "name": "unrelbusincd", + "type": "string", + "description": "UBI over $1000?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Organization Filed 990T", + "mode": "nullable", + }, + { + "name": "contractioncd", + "type": "string", + "description": "Liquidation dissolution termination or contraction", + "mode": "nullable", + }, + { + "name": "politicalexpend", + "type": "integer", + "description": "Direct or indirect political expenditures", + "mode": "nullable", + }, + { + "name": "filedf1120polcd", + "type": "string", + "description": "File Form 1120-POL?", + "mode": "nullable", + }, + { + "name": "loanstoofficerscd", + "type": "string", + "description": "Loans to/from officers directors or trustees?", + "mode": "nullable", + }, + { + "name": "loanstoofficers", + "type": "integer", + "description": "Amount of loans to/from officers", + "mode": "nullable", + }, + { + "name": "initiationfee", + "type": "integer", + "description": "Initiation fees and capital contributions", + "mode": "nullable", + }, + { + "name": "grspublicrcpts", + "type": "integer", + "description": "Gross receipts for public use of club facilities", + "mode": "nullable", + }, + { + "name": "s4958excessbenefcd", + "type": "string", + "description": "Section 4958 excess benefit transactions?", + "mode": "nullable", + }, + { + "name": "prohibtdtxshltrcd", + "type": "string", + "description": "Party to a prohibited tax shelter transaction?", + "mode": "nullable", + }, + { + "name": "nonpfrea", + "type": "integer", + "description": "Reason for non-PF status", + "mode": "nullable", + }, + { + "name": "totnooforgscnt", + "type": "integer", + "description": "Number of organizations supported", + "mode": "nullable", + }, + { + "name": "totsupport", + "type": "integer", + "description": "Sum of amounts of support", + "mode": "nullable", + }, + { + "name": "gftgrntsrcvd170", + "type": "integer", + "description": "Gifts grants membership fees received (170)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied170", + "type": "integer", + "description": "Tax revenues levied (170)", + "mode": "nullable", + }, + { + "name": "srvcsval170", + "type": "integer", + "description": "Services or facilities furnished by gov (170)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot170", + "type": "integer", + "description": "Public support subtotal (170)", + "mode": "nullable", + }, + { + "name": "exceeds2pct170", + "type": "integer", + "description": "Amount support exceeds total (170)", + "mode": "nullable", + }, + { + "name": "pubsupplesspct170", + "type": "integer", + "description": "Public support (170)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot170", + "type": "integer", + "description": "Public support from line 4 (170)", + "mode": "nullable", + }, + { + "name": "grsinc170", + "type": "integer", + "description": "Gross income from interest etc (170)", + "mode": "nullable", + }, + { + "name": "netincunreltd170", + "type": "integer", + "description": "Net UBI (170)", + "mode": "nullable", + }, + { + "name": "othrinc170", + "type": "integer", + "description": "Other income (170)", + "mode": "nullable", + }, + { + "name": "totsupp170", + "type": "integer", + "description": "Total support (170)", + "mode": "nullable", + }, + { + "name": "grsrcptsrelated170", + "type": "integer", + "description": "Gross receipts from related activities (170)", + "mode": "nullable", + }, + { + "name": "totgftgrntrcvd509", + "type": "integer", + "description": "Gifts grants membership fees received (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsadmissn509", + "type": "integer", + "description": "Receipts from admissions merchandise etc (509)", + "mode": "nullable", + }, + { + "name": "grsrcptsactivities509", + "type": "integer", + "description": "Gross receipts from related activities (509)", + "mode": "nullable", + }, + { + "name": "txrevnuelevied509", + "type": "integer", + "description": "Tax revenues levied (509)", + "mode": "nullable", + }, + { + "name": "srvcsval509", + "type": "integer", + "description": "Services or facilities furnished by gov (509)", + "mode": "nullable", + }, + { + "name": "pubsuppsubtot509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "rcvdfrmdisqualsub509", + "type": "integer", + "description": "Amounts from disqualified persons (509)", + "mode": "nullable", + }, + { + "name": "exceeds1pct509", + "type": "integer", + "description": "Amount support exceeds total (509)", + "mode": "nullable", + }, + { + "name": "subtotpub509", + "type": "integer", + "description": "Public support subtotal (509)", + "mode": "nullable", + }, + { + "name": "pubsupplesub509", + "type": "integer", + "description": "Public support (509)", + "mode": "nullable", + }, + { + "name": "samepubsuppsubtot509", + "type": "integer", + "description": "Public support from line 6 (509)", + "mode": "nullable", + }, + { + "name": "grsinc509", + "type": "integer", + "description": "Gross income from interest etc (509)", + "mode": "nullable", + }, + { + "name": "unreltxincls511tx509", + "type": "integer", + "description": "Net UBI (509)", + "mode": "nullable", + }, + { + "name": "subtotsuppinc509", + "type": "integer", + "description": "Subtotal total support (509)", + "mode": "nullable", + }, + { + "name": "netincunrelatd509", + "type": "integer", + "description": "Net income from UBI not in 10b (509)", + "mode": "nullable", + }, + { + "name": "othrinc509", + "type": "integer", + "description": "Other income (509)", + "mode": "nullable", + }, + { + "name": "totsupp509", + "type": "integer", + "description": "Total support (509)", + "mode": "nullable", + }, + ], + ) + + irs_990_ez_2017_transform_csv >> load_irs_990_ez_2017_to_bq diff --git a/datasets/irs_990/irs_990_ez_2017/pipeline.yaml b/datasets/irs_990/irs_990_ez_2017/pipeline.yaml new file mode 100644 index 000000000..5179d8efe --- /dev/null +++ b/datasets/irs_990/irs_990_ez_2017/pipeline.yaml @@ -0,0 +1,406 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_ez_2017 + + # Description of the table + description: "irs_990_ez_2017 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_ez_2017 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_ez_2017_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_ez_2017" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/17eofinextractEZ.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_ez_2017/data_output.csv" + PIPELINE_NAME: "irs_990_ez_2017" + CSV_HEADERS: >- + ["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"] + RENAME_MAPPINGS: >- + {"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + + task_id: "load_irs_990_ez_2017_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_ez_2017/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_ez_2017" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name : "ein" + type : "string" + description: "Employer Identification Number" + mode : "required" + - name : "elf" + type : "string" + description: "E-file indicator" + mode : "nullable" + - name : "tax_pd" + type : "integer" + description: "Tax period" + mode : "nullable" + - name : "subseccd" + type : "integer" + description: "Subsection code" + mode : "nullable" + - name : "totcntrbs" + type : "integer" + description: "Contributions gifts grants etc received" + mode : "nullable" + - name : "prgmservrev" + type : "integer" + description: "Program service revenue" + mode : "nullable" + - name : "duesassesmnts" + type : "integer" + description: "Membership dues and assessments" + mode : "nullable" + - name : "othrinvstinc" + type : "integer" + description: "Investment income" + mode : "nullable" + - name : "grsamtsalesastothr" + type : "integer" + description: "Gross amount from sale of assets" + mode : "nullable" + - name : "basisalesexpnsothr" + type : "integer" + description: "Cost or other basis and sales expenses" + mode : "nullable" + - name : "gnsaleofastothr" + type : "integer" + description: "Gain or (loss) from sale of assets" + mode : "nullable" + - name : "grsincgaming" + type : "integer" + description: "Gross income from gaming" + mode : "nullable" + - name : "grsrevnuefndrsng" + type : "integer" + description: "Special events gross revenue" + mode : "nullable" + - name : "direxpns" + type : "integer" + description: "Special events direct expenses" + mode : "nullable" + - name : "netincfndrsng" + type : "integer" + description: "Special events net income (or loss)" + mode : "nullable" + - name : "grsalesminusret" + type : "integer" + description: "Gross sales of inventory" + mode : "nullable" + - name : "costgoodsold" + type : "integer" + description: "Less: cost of goods sold" + mode : "nullable" + - name : "grsprft" + type : "integer" + description: "Gross profit (or loss) from sales of inventory" + mode : "nullable" + - name : "othrevnue" + type : "integer" + description: "Other revenue - total" + mode : "nullable" + - name : "totrevnue" + type : "integer" + description: "Total revenue" + mode : "nullable" + - name : "totexpns" + type : "integer" + description: "Total expenses" + mode : "nullable" + - name : "totexcessyr" + type : "integer" + description: "Excess or deficit" + mode : "nullable" + - name : "othrchgsnetassetfnd" + type : "integer" + description: "Other changes in net assets" + mode : "nullable" + - name : "networthend" + type : "integer" + description: "Net assets EOY" + mode : "nullable" + - name : "totassetsend" + type : "integer" + description: "Total assets e-o-y" + mode : "nullable" + - name : "totliabend" + type : "integer" + description: "Total liabilities e-o-y" + mode : "nullable" + - name : "totnetassetsend" + type : "integer" + description: "Total net worth e-o-y" + mode : "nullable" + - name : "actvtynotprevrptcd" + type : "string" + description: "Activity not previously reported?" + mode : "nullable" + - name : "chngsinorgcd" + type : "string" + description: "Significant changes to governing docs?" + mode : "nullable" + - name : "unrelbusincd" + type : "string" + description: "UBI over $1000?" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + description: "Organization Filed 990T" + mode : "nullable" + - name : "contractioncd" + type : "string" + description: "Liquidation dissolution termination or contraction" + mode : "nullable" + - name : "politicalexpend" + type : "integer" + description: "Direct or indirect political expenditures" + mode : "nullable" + - name : "filedf1120polcd" + type : "string" + description: "File Form 1120-POL?" + mode : "nullable" + - name : "loanstoofficerscd" + type : "string" + description: "Loans to/from officers directors or trustees?" + mode : "nullable" + - name : "loanstoofficers" + type : "integer" + description: "Amount of loans to/from officers" + mode : "nullable" + - name : "initiationfee" + type : "integer" + description: "Initiation fees and capital contributions" + mode : "nullable" + - name : "grspublicrcpts" + type : "integer" + description: "Gross receipts for public use of club facilities" + mode : "nullable" + - name : "s4958excessbenefcd" + type : "string" + description: "Section 4958 excess benefit transactions?" + mode : "nullable" + - name : "prohibtdtxshltrcd" + type : "string" + description: "Party to a prohibited tax shelter transaction?" + mode : "nullable" + - name : "nonpfrea" + type : "integer" + description: "Reason for non-PF status" + mode : "nullable" + - name : "totnooforgscnt" + type : "integer" + description: "Number of organizations supported" + mode : "nullable" + - name : "totsupport" + type : "integer" + description: "Sum of amounts of support" + mode : "nullable" + - name : "gftgrntsrcvd170" + type : "integer" + description: "Gifts grants membership fees received (170)" + mode : "nullable" + - name : "txrevnuelevied170" + type : "integer" + description: "Tax revenues levied (170)" + mode : "nullable" + - name : "srvcsval170" + type : "integer" + description: "Services or facilities furnished by gov (170)" + mode : "nullable" + - name : "pubsuppsubtot170" + type : "integer" + description: "Public support subtotal (170)" + mode : "nullable" + - name : "exceeds2pct170" + type : "integer" + description: "Amount support exceeds total (170)" + mode : "nullable" + - name : "pubsupplesspct170" + type : "integer" + description: "Public support (170)" + mode : "nullable" + - name : "samepubsuppsubtot170" + type : "integer" + description: "Public support from line 4 (170)" + mode : "nullable" + - name : "grsinc170" + type : "integer" + description: "Gross income from interest etc (170)" + mode : "nullable" + - name : "netincunreltd170" + type : "integer" + description: "Net UBI (170)" + mode : "nullable" + - name : "othrinc170" + type : "integer" + description: "Other income (170)" + mode : "nullable" + - name : "totsupp170" + type : "integer" + description: "Total support (170)" + mode : "nullable" + - name : "grsrcptsrelated170" + type : "integer" + description: "Gross receipts from related activities (170)" + mode : "nullable" + - name : "totgftgrntrcvd509" + type : "integer" + description: "Gifts grants membership fees received (509)" + mode : "nullable" + - name : "grsrcptsadmissn509" + type : "integer" + description: "Receipts from admissions merchandise etc (509)" + mode : "nullable" + - name : "grsrcptsactivities509" + type : "integer" + description: "Gross receipts from related activities (509)" + mode : "nullable" + - name : "txrevnuelevied509" + type : "integer" + description: "Tax revenues levied (509)" + mode : "nullable" + - name : "srvcsval509" + type : "integer" + description: "Services or facilities furnished by gov (509)" + mode : "nullable" + - name : "pubsuppsubtot509" + type : "integer" + description: "Public support subtotal (509)" + mode : "nullable" + - name : "rcvdfrmdisqualsub509" + type : "integer" + description: "Amounts from disqualified persons (509)" + mode : "nullable" + - name : "exceeds1pct509" + type : "integer" + description: "Amount support exceeds total (509)" + mode : "nullable" + - name : "subtotpub509" + type : "integer" + description: "Public support subtotal (509)" + mode : "nullable" + - name : "pubsupplesub509" + type : "integer" + description: "Public support (509)" + mode : "nullable" + - name : "samepubsuppsubtot509" + type : "integer" + description: "Public support from line 6 (509)" + mode : "nullable" + - name : "grsinc509" + type : "integer" + description: "Gross income from interest etc (509)" + mode : "nullable" + - name : "unreltxincls511tx509" + type : "integer" + description: "Net UBI (509)" + mode : "nullable" + - name : "subtotsuppinc509" + type : "integer" + description: "Subtotal total support (509)" + mode : "nullable" + - name : "netincunrelatd509" + type : "integer" + description: "Net income from UBI not in 10b (509)" + mode : "nullable" + - name : "othrinc509" + type : "integer" + description: "Other income (509)" + mode : "nullable" + - name : "totsupp509" + type : "integer" + description: "Total support (509)" + mode : "nullable" + + + graph_paths: + - "irs_990_ez_2017_transform_csv >> load_irs_990_ez_2017_to_bq" + + + + + \ No newline at end of file diff --git a/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py b/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py new file mode 100644 index 000000000..7660d0d46 --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py @@ -0,0 +1,1137 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_pf_2014", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_pf_2014_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_pf_2014_transform_csv", + startup_timeout_seconds=600, + name="irs_990_pf_2014", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/14eofinextract990pf.zip", + "SOURCE_FILE": "files/data.zip", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_pf_2014/data_output.csv", + "PIPELINE_NAME": "irs_990_pf_2014", + "CSV_HEADERS": '["ein","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","acqdrindrintcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"]', + "RENAME_MAPPINGS": '{"EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","ACQDRINDRINTCD": "acqdrindrintcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"}', + }, + resources={"request_memory": "4G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_pf_2014_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_pf_2014_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_pf_2014/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_pf_2014", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "tax_prd", + "type": "string", + "description": "Tax period (YYYYMM format)", + "mode": "nullable", + }, + { + "name": "eostatus", + "type": "string", + "description": "EO Status Code", + "mode": "nullable", + }, + { + "name": "tax_yr", + "type": "integer", + "description": "SOI Year", + "mode": "nullable", + }, + { + "name": "operatingcd", + "type": "string", + "description": "Operating foundation code", + "mode": "nullable", + }, + { + "name": "subcd", + "type": "string", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "fairmrktvalamt", + "type": "integer", + "description": "Total assets – e-o-y fair market valu", + "mode": "nullable", + }, + { + "name": "grscontrgifts", + "type": "integer", + "description": "Contributions received", + "mode": "nullable", + }, + { + "name": "schedbind", + "type": "string", + "description": "Schedule B indicator", + "mode": "nullable", + }, + { + "name": "intrstrvnue", + "type": "integer", + "description": "Interest revenue", + "mode": "nullable", + }, + { + "name": "dividndsamt", + "type": "integer", + "description": "", + "mode": "nullable", + }, + { + "name": "grsrents", + "type": "integer", + "description": "Gross rents", + "mode": "nullable", + }, + { + "name": "grsslspramt", + "type": "integer", + "description": "Gross sales price for assets", + "mode": "nullable", + }, + { + "name": "costsold", + "type": "integer", + "description": "Cost-of-goods-sold", + "mode": "nullable", + }, + { + "name": "grsprofitbus", + "type": "integer", + "description": "Gross profit", + "mode": "nullable", + }, + { + "name": "otherincamt", + "type": "integer", + "description": "Other income", + "mode": "nullable", + }, + { + "name": "totrcptperbks", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "compofficers", + "type": "integer", + "description": "Compensation of officers", + "mode": "nullable", + }, + { + "name": "pensplemplbenf", + "type": "integer", + "description": "Pension plans employee benefits", + "mode": "nullable", + }, + { + "name": "legalfeesamt", + "type": "integer", + "description": "Legal fees", + "mode": "nullable", + }, + { + "name": "accountingfees", + "type": "integer", + "description": "Accounting fees", + "mode": "nullable", + }, + { + "name": "interestamt", + "type": "integer", + "description": "Interest", + "mode": "nullable", + }, + { + "name": "depreciationamt", + "type": "integer", + "description": "Depreciation and depletion", + "mode": "nullable", + }, + { + "name": "occupancyamt", + "type": "integer", + "description": "Occupancy", + "mode": "nullable", + }, + { + "name": "travlconfmtngs", + "type": "integer", + "description": "Travel conferences and meetings", + "mode": "nullable", + }, + { + "name": "printingpubl", + "type": "integer", + "description": "Printing and publications", + "mode": "nullable", + }, + { + "name": "topradmnexpnsa", + "type": "integer", + "description": "Total operating and administrative expenses column a", + "mode": "nullable", + }, + { + "name": "contrpdpbks", + "type": "integer", + "description": "Contributions gifts grants paid", + "mode": "nullable", + }, + { + "name": "totexpnspbks", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "excessrcpts", + "type": "integer", + "description": "Net income less deficit", + "mode": "nullable", + }, + { + "name": "totrcptnetinc", + "type": "integer", + "description": "Total receipts net investment income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsb", + "type": "integer", + "description": "Total operating and administrative expenses column b", + "mode": "nullable", + }, + { + "name": "totexpnsnetinc", + "type": "integer", + "description": "Total expenses net investment income", + "mode": "nullable", + }, + { + "name": "netinvstinc", + "type": "integer", + "description": "Net investment income", + "mode": "nullable", + }, + { + "name": "trcptadjnetinc", + "type": "integer", + "description": "Total receipts adjusted net income", + "mode": "nullable", + }, + { + "name": "totexpnsadjnet", + "type": "integer", + "description": "Total expenses adjusted net income", + "mode": "nullable", + }, + { + "name": "adjnetinc", + "type": "integer", + "description": "Adjusted net income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsd", + "type": "integer", + "description": "Total operating and administrative expenses column d", + "mode": "nullable", + }, + { + "name": "totexpnsexempt", + "type": "integer", + "description": "Total expenses – exempt purpose", + "mode": "nullable", + }, + { + "name": "othrcashamt", + "type": "integer", + "description": "Cash non-interest-bearing – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstgovtoblig", + "type": "integer", + "description": "Investments in U.S. & state government obligations – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpstk", + "type": "integer", + "description": "Investments in corporate stock – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpbnd", + "type": "integer", + "description": "Investments in corporate bonds– e-o-y book value", + "mode": "nullable", + }, + { + "name": "totinvstsec", + "type": "integer", + "description": "Total investments in securities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgloans", + "type": "integer", + "description": "Investments mortgage loans – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrinvstend", + "type": "integer", + "description": "Other investments – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrassetseoy", + "type": "integer", + "description": "Other assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgnotespay", + "type": "integer", + "description": "Mortgage loans payable – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrliabltseoy", + "type": "integer", + "description": "Other liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "tfundnworth", + "type": "integer", + "description": "Total fund net worth – e-o-y book value", + "mode": "nullable", + }, + { + "name": "fairmrktvaleoy", + "type": "integer", + "description": "Total assets – e-o-y fair market value", + "mode": "nullable", + }, + { + "name": "totexcapgnls", + "type": "integer", + "description": "Capital gain net income", + "mode": "nullable", + }, + { + "name": "totexcapgn", + "type": "integer", + "description": "Net gain – sales of assets", + "mode": "nullable", + }, + { + "name": "totexcapls", + "type": "integer", + "description": "Net loss – sales of assets", + "mode": "nullable", + }, + { + "name": "invstexcisetx", + "type": "integer", + "description": "Excise tax on net investment income", + "mode": "nullable", + }, + { + "name": "sec4940notxcd", + "type": "string", + "description": "Section 4940 – no tax", + "mode": "nullable", + }, + { + "name": "sec4940redtxcd", + "type": "string", + "description": "Section 4940 – 1 % tax", + "mode": "nullable", + }, + { + "name": "sect511tx", + "type": "integer", + "description": "Section 511 tax", + "mode": "nullable", + }, + { + "name": "subtitleatx", + "type": "integer", + "description": "Subtitle A tax", + "mode": "nullable", + }, + { + "name": "totaxpyr", + "type": "integer", + "description": "Total excise tax", + "mode": "nullable", + }, + { + "name": "esttaxcr", + "type": "integer", + "description": "Estimated tax credit", + "mode": "nullable", + }, + { + "name": "txwithldsrc", + "type": "integer", + "description": "Tax withheld at source", + "mode": "nullable", + }, + { + "name": "txpaidf2758", + "type": "integer", + "description": "Tax paid with Form 2758 (filing extension)", + "mode": "nullable", + }, + { + "name": "erronbkupwthld", + "type": "integer", + "description": "Erroneous backup withholding credit amount", + "mode": "nullable", + }, + { + "name": "estpnlty", + "type": "integer", + "description": "Estimated tax penalty", + "mode": "nullable", + }, + { + "name": "taxdue", + "type": "integer", + "description": "Tax due", + "mode": "nullable", + }, + { + "name": "overpay", + "type": "integer", + "description": "Overpayment", + "mode": "nullable", + }, + { + "name": "crelamt", + "type": "integer", + "description": "Credit elect amount", + "mode": "nullable", + }, + { + "name": "infleg", + "type": "string", + "description": "Influence legislation?", + "mode": "nullable", + }, + { + "name": "actnotpr", + "type": "string", + "description": "Activities not previously reported?", + "mode": "nullable", + }, + { + "name": "chgnprvrptcd", + "type": "string", + "description": "Changes not previously reported?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Filed 990-T?", + "mode": "nullable", + }, + { + "name": "contractncd", + "type": "string", + "description": "Contraction?", + "mode": "nullable", + }, + { + "name": "furnishcpycd", + "type": "string", + "description": "Furnished copy to Attorney General?", + "mode": "nullable", + }, + { + "name": "claimstatcd", + "type": "string", + "description": "Claiming status?", + "mode": "nullable", + }, + { + "name": "cntrbtrstxyrcd", + "type": "string", + "description": "Substantial contributors?", + "mode": "nullable", + }, + { + "name": "acqdrindrintcd", + "type": "string", + "description": "Distribution to donor advised fund with advisory privileges?", + "mode": "nullable", + }, + { + "name": "orgcmplypubcd", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "filedlf1041ind", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "propexchcd", + "type": "string", + "description": "Property exchange?", + "mode": "nullable", + }, + { + "name": "brwlndmnycd", + "type": "string", + "description": "Borrow lend money?", + "mode": "nullable", + }, + { + "name": "furngoodscd", + "type": "string", + "description": "Furnished goods?", + "mode": "nullable", + }, + { + "name": "paidcmpncd", + "type": "string", + "description": "Paid compensation?", + "mode": "nullable", + }, + { + "name": "transfercd", + "type": "string", + "description": "Transfer?", + "mode": "nullable", + }, + { + "name": "agremkpaycd", + "type": "string", + "description": "Agree to make pay?", + "mode": "nullable", + }, + { + "name": "exceptactsind", + "type": "string", + "description": "Acts fail to qualify under section 53.4941(d)-3?", + "mode": "nullable", + }, + { + "name": "prioractvcd", + "type": "string", + "description": "Engage in acts in prior year?", + "mode": "nullable", + }, + { + "name": "undistrinccd", + "type": "string", + "description": "Undistributed income?", + "mode": "nullable", + }, + { + "name": "applyprovind", + "type": "string", + "description": "Not applying section 4942(a)(2) provisions?", + "mode": "nullable", + }, + { + "name": "dirindirintcd", + "type": "string", + "description": "Direct indirect interest?", + "mode": "nullable", + }, + { + "name": "excesshldcd", + "type": "string", + "description": "Excess business holdings?", + "mode": "nullable", + }, + { + "name": "invstjexmptcd", + "type": "string", + "description": "Jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "prevjexmptcd", + "type": "string", + "description": "Prior year jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "propgndacd", + "type": "string", + "description": "Propaganda?", + "mode": "nullable", + }, + { + "name": "ipubelectcd", + "type": "string", + "description": "Influence public election?", + "mode": "nullable", + }, + { + "name": "grntindivcd", + "type": "string", + "description": "Grant individual?", + "mode": "nullable", + }, + { + "name": "nchrtygrntcd", + "type": "string", + "description": "Non-charity grant?", + "mode": "nullable", + }, + { + "name": "nreligiouscd", + "type": "string", + "description": "Non-religious?", + "mode": "nullable", + }, + { + "name": "excptransind", + "type": "string", + "description": "Transactions fail to qualify under section 53.4945?", + "mode": "nullable", + }, + { + "name": "rfprsnlbnftind", + "type": "string", + "description": "Receive funds to pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "pyprsnlbnftind", + "type": "string", + "description": "Pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "tfairmrktunuse", + "type": "integer", + "description": "Fair market value of assets not used for charitable purposes", + "mode": "nullable", + }, + { + "name": "valncharitassets", + "type": "integer", + "description": "Net value of noncharitable-use assets", + "mode": "nullable", + }, + { + "name": "cmpmininvstret", + "type": "integer", + "description": "Minimum investment return", + "mode": "nullable", + }, + { + "name": "distribamt", + "type": "integer", + "description": "Distributable amount", + "mode": "nullable", + }, + { + "name": "undistribincyr", + "type": "integer", + "description": "Undistributed income", + "mode": "nullable", + }, + { + "name": "adjnetinccola", + "type": "integer", + "description": "Adjusted net income column a", + "mode": "nullable", + }, + { + "name": "adjnetinccolb", + "type": "integer", + "description": "Adjusted net income column b", + "mode": "nullable", + }, + { + "name": "adjnetinccolc", + "type": "integer", + "description": "Adjusted net income column c", + "mode": "nullable", + }, + { + "name": "adjnetinccold", + "type": "integer", + "description": "Adjusted net income column d", + "mode": "nullable", + }, + { + "name": "adjnetinctot", + "type": "integer", + "description": "Adjusted net income total", + "mode": "nullable", + }, + { + "name": "qlfydistriba", + "type": "integer", + "description": "Qualifying distributions column a", + "mode": "nullable", + }, + { + "name": "qlfydistribb", + "type": "integer", + "description": "Qualifying distributions column b", + "mode": "nullable", + }, + { + "name": "qlfydistribc", + "type": "integer", + "description": "Qualifying distributions column c", + "mode": "nullable", + }, + { + "name": "qlfydistribd", + "type": "integer", + "description": "Qualifying distributions column d", + "mode": "nullable", + }, + { + "name": "qlfydistribtot", + "type": "integer", + "description": "Qualifying distributions total", + "mode": "nullable", + }, + { + "name": "valassetscola", + "type": "integer", + "description": "Value assets column a", + "mode": "nullable", + }, + { + "name": "valassetscolb", + "type": "integer", + "description": "Value assets column b", + "mode": "nullable", + }, + { + "name": "valassetscolc", + "type": "integer", + "description": "Value assets column c", + "mode": "nullable", + }, + { + "name": "valassetscold", + "type": "integer", + "description": "Value assets column d", + "mode": "nullable", + }, + { + "name": "valassetstot", + "type": "integer", + "description": "Value assets total", + "mode": "nullable", + }, + { + "name": "qlfyasseta", + "type": "integer", + "description": "Qualifying assets column a", + "mode": "nullable", + }, + { + "name": "qlfyassetb", + "type": "integer", + "description": "Qualifying assets column b", + "mode": "nullable", + }, + { + "name": "qlfyassetc", + "type": "integer", + "description": "Qualifying assets column c", + "mode": "nullable", + }, + { + "name": "qlfyassetd", + "type": "integer", + "description": "Qualifying assets column d", + "mode": "nullable", + }, + { + "name": "qlfyassettot", + "type": "integer", + "description": "Qualifying assets total", + "mode": "nullable", + }, + { + "name": "endwmntscola", + "type": "integer", + "description": "Endowments column a", + "mode": "nullable", + }, + { + "name": "endwmntscolb", + "type": "integer", + "description": "Endowments column b", + "mode": "nullable", + }, + { + "name": "endwmntscolc", + "type": "integer", + "description": "Endowments column c", + "mode": "nullable", + }, + { + "name": "endwmntscold", + "type": "integer", + "description": "Endowments column d", + "mode": "nullable", + }, + { + "name": "endwmntstot", + "type": "integer", + "description": "Endowments total", + "mode": "nullable", + }, + { + "name": "totsuprtcola", + "type": "integer", + "description": "Total support column a", + "mode": "nullable", + }, + { + "name": "totsuprtcolb", + "type": "integer", + "description": "Total support column b", + "mode": "nullable", + }, + { + "name": "totsuprtcolc", + "type": "integer", + "description": "Total support column c", + "mode": "nullable", + }, + { + "name": "totsuprtcold", + "type": "integer", + "description": "Total support column d", + "mode": "nullable", + }, + { + "name": "totsuprttot", + "type": "integer", + "description": "Total support total", + "mode": "nullable", + }, + { + "name": "pubsuprtcola", + "type": "integer", + "description": "Public support column a", + "mode": "nullable", + }, + { + "name": "pubsuprtcolb", + "type": "integer", + "description": "Public support column b", + "mode": "nullable", + }, + { + "name": "pubsuprtcolc", + "type": "integer", + "description": "Public support column c", + "mode": "nullable", + }, + { + "name": "pubsuprtcold", + "type": "integer", + "description": "Public support column d", + "mode": "nullable", + }, + { + "name": "pubsuprttot", + "type": "integer", + "description": "Public support total", + "mode": "nullable", + }, + { + "name": "grsinvstinca", + "type": "integer", + "description": "Gross investment income column a", + "mode": "nullable", + }, + { + "name": "grsinvstincb", + "type": "integer", + "description": "Gross investment income column b", + "mode": "nullable", + }, + { + "name": "grsinvstincc", + "type": "integer", + "description": "Gross investment income column c", + "mode": "nullable", + }, + { + "name": "grsinvstincd", + "type": "integer", + "description": "Gross investment income column d", + "mode": "nullable", + }, + { + "name": "grsinvstinctot", + "type": "integer", + "description": "Gross investment income total", + "mode": "nullable", + }, + { + "name": "grntapprvfut", + "type": "integer", + "description": "Grants approved for future payment", + "mode": "nullable", + }, + { + "name": "progsrvcacold", + "type": "integer", + "description": "Program service revenue line 1a (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcacole", + "type": "integer", + "description": "Program service revenue line 1a (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcbcold", + "type": "integer", + "description": "Program service revenue line 1b (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcbcole", + "type": "integer", + "description": "Program service revenue line 1b (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcccold", + "type": "integer", + "description": "Program service revenue line 1c (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcccole", + "type": "integer", + "description": "Program service revenue line 1c (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcdcold", + "type": "integer", + "description": "Program service revenue line 1d (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcdcole", + "type": "integer", + "description": "Program service revenue line 1d (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcecold", + "type": "integer", + "description": "Program service revenue line 1e (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcecole", + "type": "integer", + "description": "Program service revenue line 1e (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcfcold", + "type": "integer", + "description": "Program service revenue line 1f (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcfcole", + "type": "integer", + "description": "Program service revenue line 1f (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcgcold", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcgcole", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (exempt)", + "mode": "nullable", + }, + { + "name": "membershpduesd", + "type": "integer", + "description": "Membership dues and assessments (excluded)", + "mode": "nullable", + }, + { + "name": "membershpduese", + "type": "integer", + "description": "Membership dues and assessments (exempt)", + "mode": "nullable", + }, + { + "name": "intonsvngsd", + "type": "integer", + "description": "Interest on savings and temporary cash investments (excluded)", + "mode": "nullable", + }, + { + "name": "intonsvngse", + "type": "integer", + "description": "Interest on savings and temporary cash investments (exempt)", + "mode": "nullable", + }, + { + "name": "dvdndsintd", + "type": "integer", + "description": "Dividends and interest from securities (excluded)", + "mode": "nullable", + }, + { + "name": "dvdndsinte", + "type": "integer", + "description": "Dividends and interest from securities (exempt)", + "mode": "nullable", + }, + { + "name": "trnsfrcashcd", + "type": "string", + "description": "Transfer cash to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "trnsothasstscd", + "type": "string", + "description": "Transfer other assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "salesasstscd", + "type": "string", + "description": "Sale of assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "prchsasstscd", + "type": "string", + "description": "Purchase of assets from noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "rentlsfacltscd", + "type": "string", + "description": "Rental of facilities or other assets?", + "mode": "nullable", + }, + { + "name": "reimbrsmntscd", + "type": "string", + "description": "Reimbursements arrangements?", + "mode": "nullable", + }, + { + "name": "loansguarcd", + "type": "string", + "description": "Loans or other guarantees?", + "mode": "nullable", + }, + { + "name": "perfservicescd", + "type": "string", + "description": "Performance of services or membership or fundraising solicitations?", + "mode": "nullable", + }, + { + "name": "sharngasstscd", + "type": "string", + "description": "Sharing of facilities equipment mailing lists other assets or paid employees?", + "mode": "nullable", + }, + ], + ) + + irs_990_pf_2014_transform_csv >> load_irs_990_pf_2014_to_bq diff --git a/datasets/irs_990/irs_990_pf_2014/pipeline.yaml b/datasets/irs_990/irs_990_pf_2014/pipeline.yaml new file mode 100644 index 000000000..6c477b085 --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2014/pipeline.yaml @@ -0,0 +1,828 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_pf_2014 + + # Description of the table + description: "irs_990_pf_2014 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_pf_2014 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_pf_2014_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_pf_2014" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/14eofinextract990pf.zip" + SOURCE_FILE: "files/data.zip" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_pf_2014/data_output.csv" + PIPELINE_NAME: "irs_990_pf_2014" + CSV_HEADERS: >- + ["ein","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","acqdrindrintcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"] + RENAME_MAPPINGS: >- + {"EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","ACQDRINDRINTCD": "acqdrindrintcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"} + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "4G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_pf_2014_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_pf_2014/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_pf_2014" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name : "ein" + type : "string" + description : "Employer Identification Number" + mode : "required" + - name : "tax_prd" + type : "string" + description : "Tax period (YYYYMM format)" + mode : "nullable" + - name : "eostatus" + type : "string" + description : "EO Status Code" + mode : "nullable" + - name : "tax_yr" + type : "integer" + description : "SOI Year" + mode : "nullable" + - name : "operatingcd" + type : "string" + description : "Operating foundation code" + mode : "nullable" + - name : "subcd" + type : "string" + description : "Subsection code" + mode : "nullable" + - name : "fairmrktvalamt" + type : "integer" + description : "Total assets – e-o-y fair market valu" + mode : "nullable" + - name : "grscontrgifts" + type : "integer" + description : "Contributions received" + mode : "nullable" + - name : "schedbind" + type : "string" + description : "Schedule B indicator" + mode : "nullable" + - name : "intrstrvnue" + type : "integer" + description : "Interest revenue" + mode : "nullable" + - name : "dividndsamt" + type : "integer" + description : "" + mode : "nullable" + - name : "grsrents" + type : "integer" + description : "Gross rents" + mode : "nullable" + - name : "grsslspramt" + type : "integer" + description : "Gross sales price for assets" + mode : "nullable" + - name : "costsold" + type : "integer" + description : "Cost-of-goods-sold" + mode : "nullable" + - name : "grsprofitbus" + type : "integer" + description : "Gross profit" + mode : "nullable" + - name : "otherincamt" + type : "integer" + description : "Other income" + mode : "nullable" + - name : "totrcptperbks" + type : "integer" + description : "Total revenue" + mode : "nullable" + - name : "compofficers" + type : "integer" + description : "Compensation of officers" + mode : "nullable" + - name : "pensplemplbenf" + type : "integer" + description : "Pension plans employee benefits" + mode : "nullable" + - name : "legalfeesamt" + type : "integer" + description : "Legal fees" + mode : "nullable" + - name : "accountingfees" + type : "integer" + description : "Accounting fees" + mode : "nullable" + - name : "interestamt" + type : "integer" + description : "Interest" + mode : "nullable" + - name : "depreciationamt" + type : "integer" + description : "Depreciation and depletion" + mode : "nullable" + - name : "occupancyamt" + type : "integer" + description : "Occupancy" + mode : "nullable" + - name : "travlconfmtngs" + type : "integer" + description : "Travel conferences and meetings" + mode : "nullable" + - name : "printingpubl" + type : "integer" + description : "Printing and publications" + mode : "nullable" + - name : "topradmnexpnsa" + type : "integer" + description : "Total operating and administrative expenses column a" + mode : "nullable" + - name : "contrpdpbks" + type : "integer" + description : "Contributions gifts grants paid" + mode : "nullable" + - name : "totexpnspbks" + type : "integer" + description : "Total expenses" + mode : "nullable" + - name : "excessrcpts" + type : "integer" + description : "Net income less deficit" + mode : "nullable" + - name : "totrcptnetinc" + type : "integer" + description : "Total receipts net investment income" + mode : "nullable" + - name : "topradmnexpnsb" + type : "integer" + description : "Total operating and administrative expenses column b" + mode : "nullable" + - name : "totexpnsnetinc" + type : "integer" + description : "Total expenses net investment income" + mode : "nullable" + - name : "netinvstinc" + type : "integer" + description : "Net investment income" + mode : "nullable" + - name : "trcptadjnetinc" + type : "integer" + description : "Total receipts adjusted net income" + mode : "nullable" + - name : "totexpnsadjnet" + type : "integer" + description : "Total expenses adjusted net income" + mode : "nullable" + - name : "adjnetinc" + type : "integer" + description : "Adjusted net income" + mode : "nullable" + - name : "topradmnexpnsd" + type : "integer" + description : "Total operating and administrative expenses column d" + mode : "nullable" + - name : "totexpnsexempt" + type : "integer" + description : "Total expenses – exempt purpose" + mode : "nullable" + - name : "othrcashamt" + type : "integer" + description : "Cash non-interest-bearing – e-o-y book value" + mode : "nullable" + - name : "invstgovtoblig" + type : "integer" + description : "Investments in U.S. & state government obligations – e-o-y book value" + mode : "nullable" + - name : "invstcorpstk" + type : "integer" + description : "Investments in corporate stock – e-o-y book value" + mode : "nullable" + - name : "invstcorpbnd" + type : "integer" + description : "Investments in corporate bonds– e-o-y book value" + mode : "nullable" + - name : "totinvstsec" + type : "integer" + description : "Total investments in securities – e-o-y book value" + mode : "nullable" + - name : "mrtgloans" + type : "integer" + description : "Investments mortgage loans – e-o-y book value" + mode : "nullable" + - name : "othrinvstend" + type : "integer" + description : "Other investments – e-o-y book value" + mode : "nullable" + - name : "othrassetseoy" + type : "integer" + description : "Other assets – e-o-y book value" + mode : "nullable" + - name : "totassetsend" + type : "integer" + description : "Total assets – e-o-y book value" + mode : "nullable" + - name : "mrtgnotespay" + type : "integer" + description : "Mortgage loans payable – e-o-y book value" + mode : "nullable" + - name : "othrliabltseoy" + type : "integer" + description : "Other liabilities – e-o-y book value" + mode : "nullable" + - name : "totliabend" + type : "integer" + description : "Total liabilities – e-o-y book value" + mode : "nullable" + - name : "tfundnworth" + type : "integer" + description : "Total fund net worth – e-o-y book value" + mode : "nullable" + - name : "fairmrktvaleoy" + type : "integer" + description : "Total assets – e-o-y fair market value" + mode : "nullable" + - name : "totexcapgnls" + type : "integer" + description : "Capital gain net income" + mode : "nullable" + - name : "totexcapgn" + type : "integer" + description : "Net gain – sales of assets" + mode : "nullable" + - name : "totexcapls" + type : "integer" + description : "Net loss – sales of assets" + mode : "nullable" + - name : "invstexcisetx" + type : "integer" + description : "Excise tax on net investment income" + mode : "nullable" + - name : "sec4940notxcd" + type : "string" + description : "Section 4940 – no tax" + mode : "nullable" + - name : "sec4940redtxcd" + type : "string" + description : "Section 4940 – 1 % tax" + mode : "nullable" + - name : "sect511tx" + type : "integer" + description : "Section 511 tax" + mode : "nullable" + - name : "subtitleatx" + type : "integer" + description : "Subtitle A tax" + mode : "nullable" + - name : "totaxpyr" + type : "integer" + description : "Total excise tax" + mode : "nullable" + - name : "esttaxcr" + type : "integer" + description : "Estimated tax credit" + mode : "nullable" + - name : "txwithldsrc" + type : "integer" + description : "Tax withheld at source" + mode : "nullable" + - name : "txpaidf2758" + type : "integer" + description : "Tax paid with Form 2758 (filing extension)" + mode : "nullable" + - name : "erronbkupwthld" + type : "integer" + description : "Erroneous backup withholding credit amount" + mode : "nullable" + - name : "estpnlty" + type : "integer" + description : "Estimated tax penalty" + mode : "nullable" + - name : "taxdue" + type : "integer" + description : "Tax due" + mode : "nullable" + - name : "overpay" + type : "integer" + description : "Overpayment" + mode : "nullable" + - name : "crelamt" + type : "integer" + description : "Credit elect amount" + mode : "nullable" + - name : "infleg" + type : "string" + description : "Influence legislation?" + mode : "nullable" + - name : "actnotpr" + type : "string" + description : "Activities not previously reported?" + mode : "nullable" + - name : "chgnprvrptcd" + type : "string" + description : "Changes not previously reported?" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + description : "Filed 990-T?" + mode : "nullable" + - name : "contractncd" + type : "string" + description : "Contraction?" + mode : "nullable" + - name : "furnishcpycd" + type : "string" + description : "Furnished copy to Attorney General?" + mode : "nullable" + - name : "claimstatcd" + type : "string" + description : "Claiming status?" + mode : "nullable" + - name : "cntrbtrstxyrcd" + type : "string" + description : "Substantial contributors?" + mode : "nullable" + - name : "acqdrindrintcd" + type : "string" + description : "Distribution to donor advised fund with advisory privileges?" + mode : "nullable" + - name : "orgcmplypubcd" + type : "string" + description : "Comply with public inspection?" + mode : "nullable" + - name : "filedlf1041ind" + type : "string" + description : "Comply with public inspection?" + mode : "nullable" + - name : "propexchcd" + type : "string" + description : "Property exchange?" + mode : "nullable" + - name : "brwlndmnycd" + type : "string" + description : "Borrow lend money?" + mode : "nullable" + - name : "furngoodscd" + type : "string" + description : "Furnished goods?" + mode : "nullable" + - name : "paidcmpncd" + type : "string" + description : "Paid compensation?" + mode : "nullable" + - name : "transfercd" + type : "string" + description : "Transfer?" + mode : "nullable" + - name : "agremkpaycd" + type : "string" + description : "Agree to make pay?" + mode : "nullable" + - name : "exceptactsind" + type : "string" + description : "Acts fail to qualify under section 53.4941(d)-3?" + mode : "nullable" + - name : "prioractvcd" + type : "string" + description : "Engage in acts in prior year?" + mode : "nullable" + - name : "undistrinccd" + type : "string" + description : "Undistributed income?" + mode : "nullable" + - name : "applyprovind" + type : "string" + description : "Not applying section 4942(a)(2) provisions?" + mode : "nullable" + - name : "dirindirintcd" + type : "string" + description : "Direct indirect interest?" + mode : "nullable" + - name : "excesshldcd" + type : "string" + description : "Excess business holdings?" + mode : "nullable" + - name : "invstjexmptcd" + type : "string" + description : "Jeopardizing investments?" + mode : "nullable" + - name : "prevjexmptcd" + type : "string" + description : "Prior year jeopardizing investments?" + mode : "nullable" + - name : "propgndacd" + type : "string" + description : "Propaganda?" + mode : "nullable" + - name : "ipubelectcd" + type : "string" + description : "Influence public election?" + mode : "nullable" + - name : "grntindivcd" + type : "string" + description : "Grant individual?" + mode : "nullable" + - name : "nchrtygrntcd" + type : "string" + description : "Non-charity grant?" + mode : "nullable" + - name : "nreligiouscd" + type : "string" + description : "Non-religious?" + mode : "nullable" + - name : "excptransind" + type : "string" + description : "Transactions fail to qualify under section 53.4945?" + mode : "nullable" + - name : "rfprsnlbnftind" + type : "string" + description : "Receive funds to pay premiums on personal benefit contract?" + mode : "nullable" + - name : "pyprsnlbnftind" + type : "string" + description : "Pay premiums on personal benefit contract?" + mode : "nullable" + - name : "tfairmrktunuse" + type : "integer" + description : "Fair market value of assets not used for charitable purposes" + mode : "nullable" + - name : "valncharitassets" + type : "integer" + description : "Net value of noncharitable-use assets" + mode : "nullable" + - name : "cmpmininvstret" + type : "integer" + description : "Minimum investment return" + mode : "nullable" + - name : "distribamt" + type : "integer" + description : "Distributable amount" + mode : "nullable" + - name : "undistribincyr" + type : "integer" + description : "Undistributed income" + mode : "nullable" + - name : "adjnetinccola" + type : "integer" + description : "Adjusted net income column a" + mode : "nullable" + - name : "adjnetinccolb" + type : "integer" + description : "Adjusted net income column b" + mode : "nullable" + - name : "adjnetinccolc" + type : "integer" + description : "Adjusted net income column c" + mode : "nullable" + - name : "adjnetinccold" + type : "integer" + description : "Adjusted net income column d" + mode : "nullable" + - name : "adjnetinctot" + type : "integer" + description : "Adjusted net income total" + mode : "nullable" + - name : "qlfydistriba" + type : "integer" + description : "Qualifying distributions column a" + mode : "nullable" + - name : "qlfydistribb" + type : "integer" + description : "Qualifying distributions column b" + mode : "nullable" + - name : "qlfydistribc" + type : "integer" + description : "Qualifying distributions column c" + mode : "nullable" + - name : "qlfydistribd" + type : "integer" + description : "Qualifying distributions column d" + mode : "nullable" + - name : "qlfydistribtot" + type : "integer" + description : "Qualifying distributions total" + mode : "nullable" + - name : "valassetscola" + type : "integer" + description : "Value assets column a" + mode : "nullable" + - name : "valassetscolb" + type : "integer" + description : "Value assets column b" + mode : "nullable" + - name : "valassetscolc" + type : "integer" + description : "Value assets column c" + mode : "nullable" + - name : "valassetscold" + type : "integer" + description : "Value assets column d" + mode : "nullable" + - name : "valassetstot" + type : "integer" + description : "Value assets total" + mode : "nullable" + - name : "qlfyasseta" + type : "integer" + description : "Qualifying assets column a" + mode : "nullable" + - name : "qlfyassetb" + type : "integer" + description : "Qualifying assets column b" + mode : "nullable" + - name : "qlfyassetc" + type : "integer" + description : "Qualifying assets column c" + mode : "nullable" + - name : "qlfyassetd" + type : "integer" + description : "Qualifying assets column d" + mode : "nullable" + - name : "qlfyassettot" + type : "integer" + description : "Qualifying assets total" + mode : "nullable" + - name : "endwmntscola" + type : "integer" + description : "Endowments column a" + mode : "nullable" + - name : "endwmntscolb" + type : "integer" + description : "Endowments column b" + mode : "nullable" + - name : "endwmntscolc" + type : "integer" + description : "Endowments column c" + mode : "nullable" + - name : "endwmntscold" + type : "integer" + description : "Endowments column d" + mode : "nullable" + - name : "endwmntstot" + type : "integer" + description : "Endowments total" + mode : "nullable" + - name : "totsuprtcola" + type : "integer" + description : "Total support column a" + mode : "nullable" + - name : "totsuprtcolb" + type : "integer" + description : "Total support column b" + mode : "nullable" + - name : "totsuprtcolc" + type : "integer" + description : "Total support column c" + mode : "nullable" + - name : "totsuprtcold" + type : "integer" + description : "Total support column d" + mode : "nullable" + - name : "totsuprttot" + type : "integer" + description : "Total support total" + mode : "nullable" + - name : "pubsuprtcola" + type : "integer" + description : "Public support column a" + mode : "nullable" + - name : "pubsuprtcolb" + type : "integer" + description : "Public support column b" + mode : "nullable" + - name : "pubsuprtcolc" + type : "integer" + description : "Public support column c" + mode : "nullable" + - name : "pubsuprtcold" + type : "integer" + description : "Public support column d" + mode : "nullable" + - name : "pubsuprttot" + type : "integer" + description : "Public support total" + mode : "nullable" + - name : "grsinvstinca" + type : "integer" + description : "Gross investment income column a" + mode : "nullable" + - name : "grsinvstincb" + type : "integer" + description : "Gross investment income column b" + mode : "nullable" + - name : "grsinvstincc" + type : "integer" + description : "Gross investment income column c" + mode : "nullable" + - name : "grsinvstincd" + type : "integer" + description : "Gross investment income column d" + mode : "nullable" + - name : "grsinvstinctot" + type : "integer" + description : "Gross investment income total" + mode : "nullable" + - name : "grntapprvfut" + type : "integer" + description : "Grants approved for future payment" + mode : "nullable" + - name : "progsrvcacold" + type : "integer" + description : "Program service revenue line 1a (excluded)" + mode : "nullable" + - name : "progsrvcacole" + type : "integer" + description : "Program service revenue line 1a (exempt)" + mode : "nullable" + - name : "progsrvcbcold" + type : "integer" + description : "Program service revenue line 1b (excluded)" + mode : "nullable" + - name : "progsrvcbcole" + type : "integer" + description : "Program service revenue line 1b (exempt)" + mode : "nullable" + - name : "progsrvcccold" + type : "integer" + description : "Program service revenue line 1c (excluded)" + mode : "nullable" + - name : "progsrvcccole" + type : "integer" + description : "Program service revenue line 1c (exempt)" + mode : "nullable" + - name : "progsrvcdcold" + type : "integer" + description : "Program service revenue line 1d (excluded)" + mode : "nullable" + - name : "progsrvcdcole" + type : "integer" + description : "Program service revenue line 1d (exempt)" + mode : "nullable" + - name : "progsrvcecold" + type : "integer" + description : "Program service revenue line 1e (excluded)" + mode : "nullable" + - name : "progsrvcecole" + type : "integer" + description : "Program service revenue line 1e (exempt)" + mode : "nullable" + - name : "progsrvcfcold" + type : "integer" + description : "Program service revenue line 1f (excluded)" + mode : "nullable" + - name : "progsrvcfcole" + type : "integer" + description : "Program service revenue line 1f (exempt)" + mode : "nullable" + - name : "progsrvcgcold" + type : "integer" + description : "Program service revenue--fees and contracts from government line 1g (excluded)" + mode : "nullable" + - name : "progsrvcgcole" + type : "integer" + description : "Program service revenue--fees and contracts from government line 1g (exempt)" + mode : "nullable" + - name : "membershpduesd" + type : "integer" + description : "Membership dues and assessments (excluded)" + mode : "nullable" + - name : "membershpduese" + type : "integer" + description : "Membership dues and assessments (exempt)" + mode : "nullable" + - name : "intonsvngsd" + type : "integer" + description : "Interest on savings and temporary cash investments (excluded)" + mode : "nullable" + - name : "intonsvngse" + type : "integer" + description : "Interest on savings and temporary cash investments (exempt)" + mode : "nullable" + - name : "dvdndsintd" + type : "integer" + description : "Dividends and interest from securities (excluded)" + mode : "nullable" + - name : "dvdndsinte" + type : "integer" + description : "Dividends and interest from securities (exempt)" + mode : "nullable" + - name : "trnsfrcashcd" + type : "string" + description : "Transfer cash to noncharitable exempt organization?" + mode : "nullable" + - name : "trnsothasstscd" + type : "string" + description : "Transfer other assets to noncharitable exempt organization?" + mode : "nullable" + - name : "salesasstscd" + type : "string" + description : "Sale of assets to noncharitable exempt organization?" + mode : "nullable" + - name : "prchsasstscd" + type : "string" + description : "Purchase of assets from noncharitable exempt organization?" + mode : "nullable" + - name : "rentlsfacltscd" + type : "string" + description : "Rental of facilities or other assets?" + mode : "nullable" + - name : "reimbrsmntscd" + type : "string" + description : "Reimbursements arrangements?" + mode : "nullable" + - name : "loansguarcd" + type : "string" + description : "Loans or other guarantees?" + mode : "nullable" + - name : "perfservicescd" + type : "string" + description : "Performance of services or membership or fundraising solicitations?" + mode : "nullable" + - name : "sharngasstscd" + type : "string" + description : "Sharing of facilities equipment mailing lists other assets or paid employees?" + mode : "nullable" + + + graph_paths: + - "irs_990_pf_2014_transform_csv >> load_irs_990_pf_2014_to_bq" + + + + + \ No newline at end of file diff --git a/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py b/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py new file mode 100644 index 000000000..b535058a8 --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py @@ -0,0 +1,1143 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_pf_2015", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_pf_2015_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_pf_2015_transform_csv", + startup_timeout_seconds=600, + name="irs_990_pf_2015", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/15eofinextract990pf.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_pf_2015/data_output.csv", + "PIPELINE_NAME": "irs_990_pf_2015", + "CSV_HEADERS": '["ein","elf","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","distribdafcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"]', + "RENAME_MAPPINGS": '{"ELF": "elf","ELFCD": "elf","EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","DISTRIBDAFCD": "distribdafcd","ACQDRINDRINTCD": "distribdafcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"}', + }, + resources={"request_memory": "2G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_pf_2015_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_pf_2015_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_pf_2015/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_pf_2015", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_prd", + "type": "string", + "description": "Tax period (YYYYMM format)", + "mode": "nullable", + }, + { + "name": "eostatus", + "type": "string", + "description": "EO Status Code", + "mode": "nullable", + }, + { + "name": "tax_yr", + "type": "integer", + "description": "SOI Year", + "mode": "nullable", + }, + { + "name": "operatingcd", + "type": "string", + "description": "Operating foundation code", + "mode": "nullable", + }, + { + "name": "subcd", + "type": "string", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "fairmrktvalamt", + "type": "integer", + "description": "Total assets – e-o-y fair market valu", + "mode": "nullable", + }, + { + "name": "grscontrgifts", + "type": "integer", + "description": "Contributions received", + "mode": "nullable", + }, + { + "name": "schedbind", + "type": "string", + "description": "Schedule B indicator", + "mode": "nullable", + }, + { + "name": "intrstrvnue", + "type": "integer", + "description": "Interest revenue", + "mode": "nullable", + }, + { + "name": "dividndsamt", + "type": "integer", + "description": "", + "mode": "nullable", + }, + { + "name": "grsrents", + "type": "integer", + "description": "Gross rents", + "mode": "nullable", + }, + { + "name": "grsslspramt", + "type": "integer", + "description": "Gross sales price for assets", + "mode": "nullable", + }, + { + "name": "costsold", + "type": "integer", + "description": "Cost-of-goods-sold", + "mode": "nullable", + }, + { + "name": "grsprofitbus", + "type": "integer", + "description": "Gross profit", + "mode": "nullable", + }, + { + "name": "otherincamt", + "type": "integer", + "description": "Other income", + "mode": "nullable", + }, + { + "name": "totrcptperbks", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "compofficers", + "type": "integer", + "description": "Compensation of officers", + "mode": "nullable", + }, + { + "name": "pensplemplbenf", + "type": "integer", + "description": "Pension plans employee benefits", + "mode": "nullable", + }, + { + "name": "legalfeesamt", + "type": "integer", + "description": "Legal fees", + "mode": "nullable", + }, + { + "name": "accountingfees", + "type": "integer", + "description": "Accounting fees", + "mode": "nullable", + }, + { + "name": "interestamt", + "type": "integer", + "description": "Interest", + "mode": "nullable", + }, + { + "name": "depreciationamt", + "type": "integer", + "description": "Depreciation and depletion", + "mode": "nullable", + }, + { + "name": "occupancyamt", + "type": "integer", + "description": "Occupancy", + "mode": "nullable", + }, + { + "name": "travlconfmtngs", + "type": "integer", + "description": "Travel conferences and meetings", + "mode": "nullable", + }, + { + "name": "printingpubl", + "type": "integer", + "description": "Printing and publications", + "mode": "nullable", + }, + { + "name": "topradmnexpnsa", + "type": "integer", + "description": "Total operating and administrative expenses column a", + "mode": "nullable", + }, + { + "name": "contrpdpbks", + "type": "integer", + "description": "Contributions gifts grants paid", + "mode": "nullable", + }, + { + "name": "totexpnspbks", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "excessrcpts", + "type": "integer", + "description": "Net income less deficit", + "mode": "nullable", + }, + { + "name": "totrcptnetinc", + "type": "integer", + "description": "Total receipts net investment income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsb", + "type": "integer", + "description": "Total operating and administrative expenses column b", + "mode": "nullable", + }, + { + "name": "totexpnsnetinc", + "type": "integer", + "description": "Total expenses net investment income", + "mode": "nullable", + }, + { + "name": "netinvstinc", + "type": "integer", + "description": "Net investment income", + "mode": "nullable", + }, + { + "name": "trcptadjnetinc", + "type": "integer", + "description": "Total receipts adjusted net income", + "mode": "nullable", + }, + { + "name": "totexpnsadjnet", + "type": "integer", + "description": "Total expenses adjusted net income", + "mode": "nullable", + }, + { + "name": "adjnetinc", + "type": "integer", + "description": "Adjusted net income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsd", + "type": "integer", + "description": "Total operating and administrative expenses column d", + "mode": "nullable", + }, + { + "name": "totexpnsexempt", + "type": "integer", + "description": "Total expenses – exempt purpose", + "mode": "nullable", + }, + { + "name": "othrcashamt", + "type": "integer", + "description": "Cash non-interest-bearing – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstgovtoblig", + "type": "integer", + "description": "Investments in U.S. & state government obligations – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpstk", + "type": "integer", + "description": "Investments in corporate stock – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpbnd", + "type": "integer", + "description": "Investments in corporate bonds– e-o-y book value", + "mode": "nullable", + }, + { + "name": "totinvstsec", + "type": "integer", + "description": "Total investments in securities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgloans", + "type": "integer", + "description": "Investments mortgage loans – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrinvstend", + "type": "integer", + "description": "Other investments – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrassetseoy", + "type": "integer", + "description": "Other assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgnotespay", + "type": "integer", + "description": "Mortgage loans payable – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrliabltseoy", + "type": "integer", + "description": "Other liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "tfundnworth", + "type": "integer", + "description": "Total fund net worth – e-o-y book value", + "mode": "nullable", + }, + { + "name": "fairmrktvaleoy", + "type": "integer", + "description": "Total assets – e-o-y fair market value", + "mode": "nullable", + }, + { + "name": "totexcapgnls", + "type": "integer", + "description": "Capital gain net income", + "mode": "nullable", + }, + { + "name": "totexcapgn", + "type": "integer", + "description": "Net gain – sales of assets", + "mode": "nullable", + }, + { + "name": "totexcapls", + "type": "integer", + "description": "Net loss – sales of assets", + "mode": "nullable", + }, + { + "name": "invstexcisetx", + "type": "integer", + "description": "Excise tax on net investment income", + "mode": "nullable", + }, + { + "name": "sec4940notxcd", + "type": "string", + "description": "Section 4940 – no tax", + "mode": "nullable", + }, + { + "name": "sec4940redtxcd", + "type": "string", + "description": "Section 4940 – 1 % tax", + "mode": "nullable", + }, + { + "name": "sect511tx", + "type": "integer", + "description": "Section 511 tax", + "mode": "nullable", + }, + { + "name": "subtitleatx", + "type": "integer", + "description": "Subtitle A tax", + "mode": "nullable", + }, + { + "name": "totaxpyr", + "type": "integer", + "description": "Total excise tax", + "mode": "nullable", + }, + { + "name": "esttaxcr", + "type": "integer", + "description": "Estimated tax credit", + "mode": "nullable", + }, + { + "name": "txwithldsrc", + "type": "integer", + "description": "Tax withheld at source", + "mode": "nullable", + }, + { + "name": "txpaidf2758", + "type": "integer", + "description": "Tax paid with Form 2758 (filing extension)", + "mode": "nullable", + }, + { + "name": "erronbkupwthld", + "type": "integer", + "description": "Erroneous backup withholding credit amount", + "mode": "nullable", + }, + { + "name": "estpnlty", + "type": "integer", + "description": "Estimated tax penalty", + "mode": "nullable", + }, + { + "name": "taxdue", + "type": "integer", + "description": "Tax due", + "mode": "nullable", + }, + { + "name": "overpay", + "type": "integer", + "description": "Overpayment", + "mode": "nullable", + }, + { + "name": "crelamt", + "type": "integer", + "description": "Credit elect amount", + "mode": "nullable", + }, + { + "name": "infleg", + "type": "string", + "description": "Influence legislation?", + "mode": "nullable", + }, + { + "name": "actnotpr", + "type": "string", + "description": "Activities not previously reported?", + "mode": "nullable", + }, + { + "name": "chgnprvrptcd", + "type": "string", + "description": "Changes not previously reported?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Filed 990-T?", + "mode": "nullable", + }, + { + "name": "contractncd", + "type": "string", + "description": "Contraction?", + "mode": "nullable", + }, + { + "name": "furnishcpycd", + "type": "string", + "description": "Furnished copy to Attorney General?", + "mode": "nullable", + }, + { + "name": "claimstatcd", + "type": "string", + "description": "Claiming status?", + "mode": "nullable", + }, + { + "name": "cntrbtrstxyrcd", + "type": "string", + "description": "Substantial contributors?", + "mode": "nullable", + }, + { + "name": "distribdafcd", + "type": "string", + "description": "Distribution to donor advised fund with advisory privileges?", + "mode": "nullable", + }, + { + "name": "orgcmplypubcd", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "filedlf1041ind", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "propexchcd", + "type": "string", + "description": "Property exchange?", + "mode": "nullable", + }, + { + "name": "brwlndmnycd", + "type": "string", + "description": "Borrow lend money?", + "mode": "nullable", + }, + { + "name": "furngoodscd", + "type": "string", + "description": "Furnished goods?", + "mode": "nullable", + }, + { + "name": "paidcmpncd", + "type": "string", + "description": "Paid compensation?", + "mode": "nullable", + }, + { + "name": "transfercd", + "type": "string", + "description": "Transfer?", + "mode": "nullable", + }, + { + "name": "agremkpaycd", + "type": "string", + "description": "Agree to make pay?", + "mode": "nullable", + }, + { + "name": "exceptactsind", + "type": "string", + "description": "Acts fail to qualify under section 53.4941(d)-3?", + "mode": "nullable", + }, + { + "name": "prioractvcd", + "type": "string", + "description": "Engage in acts in prior year?", + "mode": "nullable", + }, + { + "name": "undistrinccd", + "type": "string", + "description": "Undistributed income?", + "mode": "nullable", + }, + { + "name": "applyprovind", + "type": "string", + "description": "Not applying section 4942(a)(2) provisions?", + "mode": "nullable", + }, + { + "name": "dirindirintcd", + "type": "string", + "description": "Direct indirect interest?", + "mode": "nullable", + }, + { + "name": "excesshldcd", + "type": "string", + "description": "Excess business holdings?", + "mode": "nullable", + }, + { + "name": "invstjexmptcd", + "type": "string", + "description": "Jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "prevjexmptcd", + "type": "string", + "description": "Prior year jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "propgndacd", + "type": "string", + "description": "Propaganda?", + "mode": "nullable", + }, + { + "name": "ipubelectcd", + "type": "string", + "description": "Influence public election?", + "mode": "nullable", + }, + { + "name": "grntindivcd", + "type": "string", + "description": "Grant individual?", + "mode": "nullable", + }, + { + "name": "nchrtygrntcd", + "type": "string", + "description": "Non-charity grant?", + "mode": "nullable", + }, + { + "name": "nreligiouscd", + "type": "string", + "description": "Non-religious?", + "mode": "nullable", + }, + { + "name": "excptransind", + "type": "string", + "description": "Transactions fail to qualify under section 53.4945?", + "mode": "nullable", + }, + { + "name": "rfprsnlbnftind", + "type": "string", + "description": "Receive funds to pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "pyprsnlbnftind", + "type": "string", + "description": "Pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "tfairmrktunuse", + "type": "integer", + "description": "Fair market value of assets not used for charitable purposes", + "mode": "nullable", + }, + { + "name": "valncharitassets", + "type": "integer", + "description": "Net value of noncharitable-use assets", + "mode": "nullable", + }, + { + "name": "cmpmininvstret", + "type": "integer", + "description": "Minimum investment return", + "mode": "nullable", + }, + { + "name": "distribamt", + "type": "integer", + "description": "Distributable amount", + "mode": "nullable", + }, + { + "name": "undistribincyr", + "type": "integer", + "description": "Undistributed income", + "mode": "nullable", + }, + { + "name": "adjnetinccola", + "type": "integer", + "description": "Adjusted net income column a", + "mode": "nullable", + }, + { + "name": "adjnetinccolb", + "type": "integer", + "description": "Adjusted net income column b", + "mode": "nullable", + }, + { + "name": "adjnetinccolc", + "type": "integer", + "description": "Adjusted net income column c", + "mode": "nullable", + }, + { + "name": "adjnetinccold", + "type": "integer", + "description": "Adjusted net income column d", + "mode": "nullable", + }, + { + "name": "adjnetinctot", + "type": "integer", + "description": "Adjusted net income total", + "mode": "nullable", + }, + { + "name": "qlfydistriba", + "type": "integer", + "description": "Qualifying distributions column a", + "mode": "nullable", + }, + { + "name": "qlfydistribb", + "type": "integer", + "description": "Qualifying distributions column b", + "mode": "nullable", + }, + { + "name": "qlfydistribc", + "type": "integer", + "description": "Qualifying distributions column c", + "mode": "nullable", + }, + { + "name": "qlfydistribd", + "type": "integer", + "description": "Qualifying distributions column d", + "mode": "nullable", + }, + { + "name": "qlfydistribtot", + "type": "integer", + "description": "Qualifying distributions total", + "mode": "nullable", + }, + { + "name": "valassetscola", + "type": "integer", + "description": "Value assets column a", + "mode": "nullable", + }, + { + "name": "valassetscolb", + "type": "integer", + "description": "Value assets column b", + "mode": "nullable", + }, + { + "name": "valassetscolc", + "type": "integer", + "description": "Value assets column c", + "mode": "nullable", + }, + { + "name": "valassetscold", + "type": "integer", + "description": "Value assets column d", + "mode": "nullable", + }, + { + "name": "valassetstot", + "type": "integer", + "description": "Value assets total", + "mode": "nullable", + }, + { + "name": "qlfyasseta", + "type": "integer", + "description": "Qualifying assets column a", + "mode": "nullable", + }, + { + "name": "qlfyassetb", + "type": "integer", + "description": "Qualifying assets column b", + "mode": "nullable", + }, + { + "name": "qlfyassetc", + "type": "integer", + "description": "Qualifying assets column c", + "mode": "nullable", + }, + { + "name": "qlfyassetd", + "type": "integer", + "description": "Qualifying assets column d", + "mode": "nullable", + }, + { + "name": "qlfyassettot", + "type": "integer", + "description": "Qualifying assets total", + "mode": "nullable", + }, + { + "name": "endwmntscola", + "type": "integer", + "description": "Endowments column a", + "mode": "nullable", + }, + { + "name": "endwmntscolb", + "type": "integer", + "description": "Endowments column b", + "mode": "nullable", + }, + { + "name": "endwmntscolc", + "type": "integer", + "description": "Endowments column c", + "mode": "nullable", + }, + { + "name": "endwmntscold", + "type": "integer", + "description": "Endowments column d", + "mode": "nullable", + }, + { + "name": "endwmntstot", + "type": "integer", + "description": "Endowments total", + "mode": "nullable", + }, + { + "name": "totsuprtcola", + "type": "integer", + "description": "Total support column a", + "mode": "nullable", + }, + { + "name": "totsuprtcolb", + "type": "integer", + "description": "Total support column b", + "mode": "nullable", + }, + { + "name": "totsuprtcolc", + "type": "integer", + "description": "Total support column c", + "mode": "nullable", + }, + { + "name": "totsuprtcold", + "type": "integer", + "description": "Total support column d", + "mode": "nullable", + }, + { + "name": "totsuprttot", + "type": "integer", + "description": "Total support total", + "mode": "nullable", + }, + { + "name": "pubsuprtcola", + "type": "integer", + "description": "Public support column a", + "mode": "nullable", + }, + { + "name": "pubsuprtcolb", + "type": "integer", + "description": "Public support column b", + "mode": "nullable", + }, + { + "name": "pubsuprtcolc", + "type": "integer", + "description": "Public support column c", + "mode": "nullable", + }, + { + "name": "pubsuprtcold", + "type": "integer", + "description": "Public support column d", + "mode": "nullable", + }, + { + "name": "pubsuprttot", + "type": "integer", + "description": "Public support total", + "mode": "nullable", + }, + { + "name": "grsinvstinca", + "type": "integer", + "description": "Gross investment income column a", + "mode": "nullable", + }, + { + "name": "grsinvstincb", + "type": "integer", + "description": "Gross investment income column b", + "mode": "nullable", + }, + { + "name": "grsinvstincc", + "type": "integer", + "description": "Gross investment income column c", + "mode": "nullable", + }, + { + "name": "grsinvstincd", + "type": "integer", + "description": "Gross investment income column d", + "mode": "nullable", + }, + { + "name": "grsinvstinctot", + "type": "integer", + "description": "Gross investment income total", + "mode": "nullable", + }, + { + "name": "grntapprvfut", + "type": "integer", + "description": "Grants approved for future payment", + "mode": "nullable", + }, + { + "name": "progsrvcacold", + "type": "integer", + "description": "Program service revenue line 1a (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcacole", + "type": "integer", + "description": "Program service revenue line 1a (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcbcold", + "type": "integer", + "description": "Program service revenue line 1b (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcbcole", + "type": "integer", + "description": "Program service revenue line 1b (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcccold", + "type": "integer", + "description": "Program service revenue line 1c (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcccole", + "type": "integer", + "description": "Program service revenue line 1c (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcdcold", + "type": "integer", + "description": "Program service revenue line 1d (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcdcole", + "type": "integer", + "description": "Program service revenue line 1d (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcecold", + "type": "integer", + "description": "Program service revenue line 1e (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcecole", + "type": "integer", + "description": "Program service revenue line 1e (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcfcold", + "type": "integer", + "description": "Program service revenue line 1f (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcfcole", + "type": "integer", + "description": "Program service revenue line 1f (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcgcold", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcgcole", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (exempt)", + "mode": "nullable", + }, + { + "name": "membershpduesd", + "type": "integer", + "description": "Membership dues and assessments (excluded)", + "mode": "nullable", + }, + { + "name": "membershpduese", + "type": "integer", + "description": "Membership dues and assessments (exempt)", + "mode": "nullable", + }, + { + "name": "intonsvngsd", + "type": "integer", + "description": "Interest on savings and temporary cash investments (excluded)", + "mode": "nullable", + }, + { + "name": "intonsvngse", + "type": "integer", + "description": "Interest on savings and temporary cash investments (exempt)", + "mode": "nullable", + }, + { + "name": "dvdndsintd", + "type": "integer", + "description": "Dividends and interest from securities (excluded)", + "mode": "nullable", + }, + { + "name": "dvdndsinte", + "type": "integer", + "description": "Dividends and interest from securities (exempt)", + "mode": "nullable", + }, + { + "name": "trnsfrcashcd", + "type": "string", + "description": "Transfer cash to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "trnsothasstscd", + "type": "string", + "description": "Transfer other assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "salesasstscd", + "type": "string", + "description": "Sale of assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "prchsasstscd", + "type": "string", + "description": "Purchase of assets from noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "rentlsfacltscd", + "type": "string", + "description": "Rental of facilities or other assets?", + "mode": "nullable", + }, + { + "name": "reimbrsmntscd", + "type": "string", + "description": "Reimbursements arrangements?", + "mode": "nullable", + }, + { + "name": "loansguarcd", + "type": "string", + "description": "Loans or other guarantees?", + "mode": "nullable", + }, + { + "name": "perfservicescd", + "type": "string", + "description": "Performance of services or membership or fundraising solicitations?", + "mode": "nullable", + }, + { + "name": "sharngasstscd", + "type": "string", + "description": "Sharing of facilities equipment mailing lists other assets or paid employees?", + "mode": "nullable", + }, + ], + ) + + irs_990_pf_2015_transform_csv >> load_irs_990_pf_2015_to_bq diff --git a/datasets/irs_990/irs_990_pf_2015/pipeline.yaml b/datasets/irs_990/irs_990_pf_2015/pipeline.yaml new file mode 100644 index 000000000..587af6929 --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2015/pipeline.yaml @@ -0,0 +1,832 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_pf_2015 + + # Description of the table + description: "irs_990_pf_2015 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_pf_2015 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_pf_2015_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_pf_2015" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/15eofinextract990pf.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_pf_2015/data_output.csv" + PIPELINE_NAME: "irs_990_pf_2015" + CSV_HEADERS: >- + ["ein","elf","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","distribdafcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"] + RENAME_MAPPINGS: >- + {"ELF": "elf","ELFCD": "elf","EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","DISTRIBDAFCD": "distribdafcd","ACQDRINDRINTCD": "distribdafcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"} + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "2G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_pf_2015_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_pf_2015/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_pf_2015" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name : "ein" + type : "string" + description : "Employer Identification Number" + mode : "required" + - name : "elf" + type : "string" + description : "E-file indicator" + mode : "nullable" + - name : "tax_prd" + type : "string" + description : "Tax period (YYYYMM format)" + mode : "nullable" + - name : "eostatus" + type : "string" + description : "EO Status Code" + mode : "nullable" + - name : "tax_yr" + type : "integer" + description : "SOI Year" + mode : "nullable" + - name : "operatingcd" + type : "string" + description : "Operating foundation code" + mode : "nullable" + - name : "subcd" + type : "string" + description : "Subsection code" + mode : "nullable" + - name : "fairmrktvalamt" + type : "integer" + description : "Total assets – e-o-y fair market valu" + mode : "nullable" + - name : "grscontrgifts" + type : "integer" + description : "Contributions received" + mode : "nullable" + - name : "schedbind" + type : "string" + description : "Schedule B indicator" + mode : "nullable" + - name : "intrstrvnue" + type : "integer" + description : "Interest revenue" + mode : "nullable" + - name : "dividndsamt" + type : "integer" + description : "" + mode : "nullable" + - name : "grsrents" + type : "integer" + description : "Gross rents" + mode : "nullable" + - name : "grsslspramt" + type : "integer" + description : "Gross sales price for assets" + mode : "nullable" + - name : "costsold" + type : "integer" + description : "Cost-of-goods-sold" + mode : "nullable" + - name : "grsprofitbus" + type : "integer" + description : "Gross profit" + mode : "nullable" + - name : "otherincamt" + type : "integer" + description : "Other income" + mode : "nullable" + - name : "totrcptperbks" + type : "integer" + description : "Total revenue" + mode : "nullable" + - name : "compofficers" + type : "integer" + description : "Compensation of officers" + mode : "nullable" + - name : "pensplemplbenf" + type : "integer" + description : "Pension plans employee benefits" + mode : "nullable" + - name : "legalfeesamt" + type : "integer" + description : "Legal fees" + mode : "nullable" + - name : "accountingfees" + type : "integer" + description : "Accounting fees" + mode : "nullable" + - name : "interestamt" + type : "integer" + description : "Interest" + mode : "nullable" + - name : "depreciationamt" + type : "integer" + description : "Depreciation and depletion" + mode : "nullable" + - name : "occupancyamt" + type : "integer" + description : "Occupancy" + mode : "nullable" + - name : "travlconfmtngs" + type : "integer" + description : "Travel conferences and meetings" + mode : "nullable" + - name : "printingpubl" + type : "integer" + description : "Printing and publications" + mode : "nullable" + - name : "topradmnexpnsa" + type : "integer" + description : "Total operating and administrative expenses column a" + mode : "nullable" + - name : "contrpdpbks" + type : "integer" + description : "Contributions gifts grants paid" + mode : "nullable" + - name : "totexpnspbks" + type : "integer" + description : "Total expenses" + mode : "nullable" + - name : "excessrcpts" + type : "integer" + description : "Net income less deficit" + mode : "nullable" + - name : "totrcptnetinc" + type : "integer" + description : "Total receipts net investment income" + mode : "nullable" + - name : "topradmnexpnsb" + type : "integer" + description : "Total operating and administrative expenses column b" + mode : "nullable" + - name : "totexpnsnetinc" + type : "integer" + description : "Total expenses net investment income" + mode : "nullable" + - name : "netinvstinc" + type : "integer" + description : "Net investment income" + mode : "nullable" + - name : "trcptadjnetinc" + type : "integer" + description : "Total receipts adjusted net income" + mode : "nullable" + - name : "totexpnsadjnet" + type : "integer" + description : "Total expenses adjusted net income" + mode : "nullable" + - name : "adjnetinc" + type : "integer" + description : "Adjusted net income" + mode : "nullable" + - name : "topradmnexpnsd" + type : "integer" + description : "Total operating and administrative expenses column d" + mode : "nullable" + - name : "totexpnsexempt" + type : "integer" + description : "Total expenses – exempt purpose" + mode : "nullable" + - name : "othrcashamt" + type : "integer" + description : "Cash non-interest-bearing – e-o-y book value" + mode : "nullable" + - name : "invstgovtoblig" + type : "integer" + description : "Investments in U.S. & state government obligations – e-o-y book value" + mode : "nullable" + - name : "invstcorpstk" + type : "integer" + description : "Investments in corporate stock – e-o-y book value" + mode : "nullable" + - name : "invstcorpbnd" + type : "integer" + description : "Investments in corporate bonds– e-o-y book value" + mode : "nullable" + - name : "totinvstsec" + type : "integer" + description : "Total investments in securities – e-o-y book value" + mode : "nullable" + - name : "mrtgloans" + type : "integer" + description : "Investments mortgage loans – e-o-y book value" + mode : "nullable" + - name : "othrinvstend" + type : "integer" + description : "Other investments – e-o-y book value" + mode : "nullable" + - name : "othrassetseoy" + type : "integer" + description : "Other assets – e-o-y book value" + mode : "nullable" + - name : "totassetsend" + type : "integer" + description : "Total assets – e-o-y book value" + mode : "nullable" + - name : "mrtgnotespay" + type : "integer" + description : "Mortgage loans payable – e-o-y book value" + mode : "nullable" + - name : "othrliabltseoy" + type : "integer" + description : "Other liabilities – e-o-y book value" + mode : "nullable" + - name : "totliabend" + type : "integer" + description : "Total liabilities – e-o-y book value" + mode : "nullable" + - name : "tfundnworth" + type : "integer" + description : "Total fund net worth – e-o-y book value" + mode : "nullable" + - name : "fairmrktvaleoy" + type : "integer" + description : "Total assets – e-o-y fair market value" + mode : "nullable" + - name : "totexcapgnls" + type : "integer" + description : "Capital gain net income" + mode : "nullable" + - name : "totexcapgn" + type : "integer" + description : "Net gain – sales of assets" + mode : "nullable" + - name : "totexcapls" + type : "integer" + description : "Net loss – sales of assets" + mode : "nullable" + - name : "invstexcisetx" + type : "integer" + description : "Excise tax on net investment income" + mode : "nullable" + - name : "sec4940notxcd" + type : "string" + description : "Section 4940 – no tax" + mode : "nullable" + - name : "sec4940redtxcd" + type : "string" + description : "Section 4940 – 1 % tax" + mode : "nullable" + - name : "sect511tx" + type : "integer" + description : "Section 511 tax" + mode : "nullable" + - name : "subtitleatx" + type : "integer" + description : "Subtitle A tax" + mode : "nullable" + - name : "totaxpyr" + type : "integer" + description : "Total excise tax" + mode : "nullable" + - name : "esttaxcr" + type : "integer" + description : "Estimated tax credit" + mode : "nullable" + - name : "txwithldsrc" + type : "integer" + description : "Tax withheld at source" + mode : "nullable" + - name : "txpaidf2758" + type : "integer" + description : "Tax paid with Form 2758 (filing extension)" + mode : "nullable" + - name : "erronbkupwthld" + type : "integer" + description : "Erroneous backup withholding credit amount" + mode : "nullable" + - name : "estpnlty" + type : "integer" + description : "Estimated tax penalty" + mode : "nullable" + - name : "taxdue" + type : "integer" + description : "Tax due" + mode : "nullable" + - name : "overpay" + type : "integer" + description : "Overpayment" + mode : "nullable" + - name : "crelamt" + type : "integer" + description : "Credit elect amount" + mode : "nullable" + - name : "infleg" + type : "string" + description : "Influence legislation?" + mode : "nullable" + - name : "actnotpr" + type : "string" + description : "Activities not previously reported?" + mode : "nullable" + - name : "chgnprvrptcd" + type : "string" + description : "Changes not previously reported?" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + description : "Filed 990-T?" + mode : "nullable" + - name : "contractncd" + type : "string" + description : "Contraction?" + mode : "nullable" + - name : "furnishcpycd" + type : "string" + description : "Furnished copy to Attorney General?" + mode : "nullable" + - name : "claimstatcd" + type : "string" + description : "Claiming status?" + mode : "nullable" + - name : "cntrbtrstxyrcd" + type : "string" + description : "Substantial contributors?" + mode : "nullable" + - name : "distribdafcd" + type : "string" + description : "Distribution to donor advised fund with advisory privileges?" + mode : "nullable" + - name : "orgcmplypubcd" + type : "string" + description : "Comply with public inspection?" + mode : "nullable" + - name : "filedlf1041ind" + type : "string" + description : "Comply with public inspection?" + mode : "nullable" + - name : "propexchcd" + type : "string" + description : "Property exchange?" + mode : "nullable" + - name : "brwlndmnycd" + type : "string" + description : "Borrow lend money?" + mode : "nullable" + - name : "furngoodscd" + type : "string" + description : "Furnished goods?" + mode : "nullable" + - name : "paidcmpncd" + type : "string" + description : "Paid compensation?" + mode : "nullable" + - name : "transfercd" + type : "string" + description : "Transfer?" + mode : "nullable" + - name : "agremkpaycd" + type : "string" + description : "Agree to make pay?" + mode : "nullable" + - name : "exceptactsind" + type : "string" + description : "Acts fail to qualify under section 53.4941(d)-3?" + mode : "nullable" + - name : "prioractvcd" + type : "string" + description : "Engage in acts in prior year?" + mode : "nullable" + - name : "undistrinccd" + type : "string" + description : "Undistributed income?" + mode : "nullable" + - name : "applyprovind" + type : "string" + description : "Not applying section 4942(a)(2) provisions?" + mode : "nullable" + - name : "dirindirintcd" + type : "string" + description : "Direct indirect interest?" + mode : "nullable" + - name : "excesshldcd" + type : "string" + description : "Excess business holdings?" + mode : "nullable" + - name : "invstjexmptcd" + type : "string" + description : "Jeopardizing investments?" + mode : "nullable" + - name : "prevjexmptcd" + type : "string" + description : "Prior year jeopardizing investments?" + mode : "nullable" + - name : "propgndacd" + type : "string" + description : "Propaganda?" + mode : "nullable" + - name : "ipubelectcd" + type : "string" + description : "Influence public election?" + mode : "nullable" + - name : "grntindivcd" + type : "string" + description : "Grant individual?" + mode : "nullable" + - name : "nchrtygrntcd" + type : "string" + description : "Non-charity grant?" + mode : "nullable" + - name : "nreligiouscd" + type : "string" + description : "Non-religious?" + mode : "nullable" + - name : "excptransind" + type : "string" + description : "Transactions fail to qualify under section 53.4945?" + mode : "nullable" + - name : "rfprsnlbnftind" + type : "string" + description : "Receive funds to pay premiums on personal benefit contract?" + mode : "nullable" + - name : "pyprsnlbnftind" + type : "string" + description : "Pay premiums on personal benefit contract?" + mode : "nullable" + - name : "tfairmrktunuse" + type : "integer" + description : "Fair market value of assets not used for charitable purposes" + mode : "nullable" + - name : "valncharitassets" + type : "integer" + description : "Net value of noncharitable-use assets" + mode : "nullable" + - name : "cmpmininvstret" + type : "integer" + description : "Minimum investment return" + mode : "nullable" + - name : "distribamt" + type : "integer" + description : "Distributable amount" + mode : "nullable" + - name : "undistribincyr" + type : "integer" + description : "Undistributed income" + mode : "nullable" + - name : "adjnetinccola" + type : "integer" + description : "Adjusted net income column a" + mode : "nullable" + - name : "adjnetinccolb" + type : "integer" + description : "Adjusted net income column b" + mode : "nullable" + - name : "adjnetinccolc" + type : "integer" + description : "Adjusted net income column c" + mode : "nullable" + - name : "adjnetinccold" + type : "integer" + description : "Adjusted net income column d" + mode : "nullable" + - name : "adjnetinctot" + type : "integer" + description : "Adjusted net income total" + mode : "nullable" + - name : "qlfydistriba" + type : "integer" + description : "Qualifying distributions column a" + mode : "nullable" + - name : "qlfydistribb" + type : "integer" + description : "Qualifying distributions column b" + mode : "nullable" + - name : "qlfydistribc" + type : "integer" + description : "Qualifying distributions column c" + mode : "nullable" + - name : "qlfydistribd" + type : "integer" + description : "Qualifying distributions column d" + mode : "nullable" + - name : "qlfydistribtot" + type : "integer" + description : "Qualifying distributions total" + mode : "nullable" + - name : "valassetscola" + type : "integer" + description : "Value assets column a" + mode : "nullable" + - name : "valassetscolb" + type : "integer" + description : "Value assets column b" + mode : "nullable" + - name : "valassetscolc" + type : "integer" + description : "Value assets column c" + mode : "nullable" + - name : "valassetscold" + type : "integer" + description : "Value assets column d" + mode : "nullable" + - name : "valassetstot" + type : "integer" + description : "Value assets total" + mode : "nullable" + - name : "qlfyasseta" + type : "integer" + description : "Qualifying assets column a" + mode : "nullable" + - name : "qlfyassetb" + type : "integer" + description : "Qualifying assets column b" + mode : "nullable" + - name : "qlfyassetc" + type : "integer" + description : "Qualifying assets column c" + mode : "nullable" + - name : "qlfyassetd" + type : "integer" + description : "Qualifying assets column d" + mode : "nullable" + - name : "qlfyassettot" + type : "integer" + description : "Qualifying assets total" + mode : "nullable" + - name : "endwmntscola" + type : "integer" + description : "Endowments column a" + mode : "nullable" + - name : "endwmntscolb" + type : "integer" + description : "Endowments column b" + mode : "nullable" + - name : "endwmntscolc" + type : "integer" + description : "Endowments column c" + mode : "nullable" + - name : "endwmntscold" + type : "integer" + description : "Endowments column d" + mode : "nullable" + - name : "endwmntstot" + type : "integer" + description : "Endowments total" + mode : "nullable" + - name : "totsuprtcola" + type : "integer" + description : "Total support column a" + mode : "nullable" + - name : "totsuprtcolb" + type : "integer" + description : "Total support column b" + mode : "nullable" + - name : "totsuprtcolc" + type : "integer" + description : "Total support column c" + mode : "nullable" + - name : "totsuprtcold" + type : "integer" + description : "Total support column d" + mode : "nullable" + - name : "totsuprttot" + type : "integer" + description : "Total support total" + mode : "nullable" + - name : "pubsuprtcola" + type : "integer" + description : "Public support column a" + mode : "nullable" + - name : "pubsuprtcolb" + type : "integer" + description : "Public support column b" + mode : "nullable" + - name : "pubsuprtcolc" + type : "integer" + description : "Public support column c" + mode : "nullable" + - name : "pubsuprtcold" + type : "integer" + description : "Public support column d" + mode : "nullable" + - name : "pubsuprttot" + type : "integer" + description : "Public support total" + mode : "nullable" + - name : "grsinvstinca" + type : "integer" + description : "Gross investment income column a" + mode : "nullable" + - name : "grsinvstincb" + type : "integer" + description : "Gross investment income column b" + mode : "nullable" + - name : "grsinvstincc" + type : "integer" + description : "Gross investment income column c" + mode : "nullable" + - name : "grsinvstincd" + type : "integer" + description : "Gross investment income column d" + mode : "nullable" + - name : "grsinvstinctot" + type : "integer" + description : "Gross investment income total" + mode : "nullable" + - name : "grntapprvfut" + type : "integer" + description : "Grants approved for future payment" + mode : "nullable" + - name : "progsrvcacold" + type : "integer" + description : "Program service revenue line 1a (excluded)" + mode : "nullable" + - name : "progsrvcacole" + type : "integer" + description : "Program service revenue line 1a (exempt)" + mode : "nullable" + - name : "progsrvcbcold" + type : "integer" + description : "Program service revenue line 1b (excluded)" + mode : "nullable" + - name : "progsrvcbcole" + type : "integer" + description : "Program service revenue line 1b (exempt)" + mode : "nullable" + - name : "progsrvcccold" + type : "integer" + description : "Program service revenue line 1c (excluded)" + mode : "nullable" + - name : "progsrvcccole" + type : "integer" + description : "Program service revenue line 1c (exempt)" + mode : "nullable" + - name : "progsrvcdcold" + type : "integer" + description : "Program service revenue line 1d (excluded)" + mode : "nullable" + - name : "progsrvcdcole" + type : "integer" + description : "Program service revenue line 1d (exempt)" + mode : "nullable" + - name : "progsrvcecold" + type : "integer" + description : "Program service revenue line 1e (excluded)" + mode : "nullable" + - name : "progsrvcecole" + type : "integer" + description : "Program service revenue line 1e (exempt)" + mode : "nullable" + - name : "progsrvcfcold" + type : "integer" + description : "Program service revenue line 1f (excluded)" + mode : "nullable" + - name : "progsrvcfcole" + type : "integer" + description : "Program service revenue line 1f (exempt)" + mode : "nullable" + - name : "progsrvcgcold" + type : "integer" + description : "Program service revenue--fees and contracts from government line 1g (excluded)" + mode : "nullable" + - name : "progsrvcgcole" + type : "integer" + description : "Program service revenue--fees and contracts from government line 1g (exempt)" + mode : "nullable" + - name : "membershpduesd" + type : "integer" + description : "Membership dues and assessments (excluded)" + mode : "nullable" + - name : "membershpduese" + type : "integer" + description : "Membership dues and assessments (exempt)" + mode : "nullable" + - name : "intonsvngsd" + type : "integer" + description : "Interest on savings and temporary cash investments (excluded)" + mode : "nullable" + - name : "intonsvngse" + type : "integer" + description : "Interest on savings and temporary cash investments (exempt)" + mode : "nullable" + - name : "dvdndsintd" + type : "integer" + description : "Dividends and interest from securities (excluded)" + mode : "nullable" + - name : "dvdndsinte" + type : "integer" + description : "Dividends and interest from securities (exempt)" + mode : "nullable" + - name : "trnsfrcashcd" + type : "string" + description : "Transfer cash to noncharitable exempt organization?" + mode : "nullable" + - name : "trnsothasstscd" + type : "string" + description : "Transfer other assets to noncharitable exempt organization?" + mode : "nullable" + - name : "salesasstscd" + type : "string" + description : "Sale of assets to noncharitable exempt organization?" + mode : "nullable" + - name : "prchsasstscd" + type : "string" + description : "Purchase of assets from noncharitable exempt organization?" + mode : "nullable" + - name : "rentlsfacltscd" + type : "string" + description : "Rental of facilities or other assets?" + mode : "nullable" + - name : "reimbrsmntscd" + type : "string" + description : "Reimbursements arrangements?" + mode : "nullable" + - name : "loansguarcd" + type : "string" + description : "Loans or other guarantees?" + mode : "nullable" + - name : "perfservicescd" + type : "string" + description : "Performance of services or membership or fundraising solicitations?" + mode : "nullable" + - name : "sharngasstscd" + type : "string" + description : "Sharing of facilities equipment mailing lists other assets or paid employees?" + mode : "nullable" + + + graph_paths: + - "irs_990_pf_2015_transform_csv >> load_irs_990_pf_2015_to_bq" + + + + + \ No newline at end of file diff --git a/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py b/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py new file mode 100644 index 000000000..a3466db9e --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py @@ -0,0 +1,1145 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq +from airflow.contrib.operators import kubernetes_pod_operator + + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="irs_990.irs_990_pf_2016", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + irs_990_pf_2016_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="irs_990_pf_2016_transform_csv", + startup_timeout_seconds=600, + name="irs_990_pf_2016", + namespace="default", + image_pull_policy="Always", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}", + env_vars={ + "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/16eofinextract990pf.dat", + "SOURCE_FILE": "files/data.dat", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.json.shared.composer_bucket }}", + "TARGET_GCS_PATH": "data/irs_990/irs_990_pf_2016/data_output.csv", + "PIPELINE_NAME": "irs_990_pf_2016", + "CSV_HEADERS": '["ein","elf","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","distribdafcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"]', + "RENAME_MAPPINGS": '{"ELF": "elf","ELFCD": "elf","EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","DISTRIBDAFCD": "distribdafcd","ACQDRINDRINTCD": "distribdafcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"}', + }, + resources={"request_memory": "2G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_irs_990_pf_2016_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_irs_990_pf_2016_to_bq", + bucket="{{ var.json.shared.composer_bucket }}", + source_objects=["data/irs_990/irs_990_pf_2016/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="irs_990.irs_990_pf_2016", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + { + "name": "ein", + "type": "string", + "description": "Employer Identification Number", + "mode": "required", + }, + { + "name": "elf", + "type": "string", + "description": "E-file indicator", + "mode": "nullable", + }, + { + "name": "tax_prd", + "type": "string", + "description": "Tax period (YYYYMM format)", + "mode": "nullable", + }, + { + "name": "eostatus", + "type": "string", + "description": "EO Status Code", + "mode": "nullable", + }, + { + "name": "tax_yr", + "type": "integer", + "description": "SOI Year", + "mode": "nullable", + }, + { + "name": "operatingcd", + "type": "string", + "description": "Operating foundation code", + "mode": "nullable", + }, + { + "name": "subcd", + "type": "string", + "description": "Subsection code", + "mode": "nullable", + }, + { + "name": "fairmrktvalamt", + "type": "integer", + "description": "Total assets – e-o-y fair market valu", + "mode": "nullable", + }, + { + "name": "grscontrgifts", + "type": "integer", + "description": "Contributions received", + "mode": "nullable", + }, + { + "name": "schedbind", + "type": "string", + "description": "Schedule B indicator", + "mode": "nullable", + }, + { + "name": "intrstrvnue", + "type": "integer", + "description": "Interest revenue", + "mode": "nullable", + }, + { + "name": "dividndsamt", + "type": "integer", + "description": "", + "mode": "nullable", + }, + { + "name": "grsrents", + "type": "integer", + "description": "Gross rents", + "mode": "nullable", + }, + { + "name": "grsslspramt", + "type": "integer", + "description": "Gross sales price for assets", + "mode": "nullable", + }, + { + "name": "costsold", + "type": "integer", + "description": "Cost-of-goods-sold", + "mode": "nullable", + }, + { + "name": "grsprofitbus", + "type": "integer", + "description": "Gross profit", + "mode": "nullable", + }, + { + "name": "otherincamt", + "type": "integer", + "description": "Other income", + "mode": "nullable", + }, + { + "name": "totrcptperbks", + "type": "integer", + "description": "Total revenue", + "mode": "nullable", + }, + { + "name": "compofficers", + "type": "integer", + "description": "Compensation of officers", + "mode": "nullable", + }, + { + "name": "pensplemplbenf", + "type": "integer", + "description": "Pension plans employee benefits", + "mode": "nullable", + }, + { + "name": "legalfeesamt", + "type": "integer", + "description": "Legal fees", + "mode": "nullable", + }, + { + "name": "accountingfees", + "type": "integer", + "description": "Accounting fees", + "mode": "nullable", + }, + { + "name": "interestamt", + "type": "integer", + "description": "Interest", + "mode": "nullable", + }, + { + "name": "depreciationamt", + "type": "integer", + "description": "Depreciation and depletion", + "mode": "nullable", + }, + { + "name": "occupancyamt", + "type": "integer", + "description": "Occupancy", + "mode": "nullable", + }, + { + "name": "travlconfmtngs", + "type": "integer", + "description": "Travel conferences and meetings", + "mode": "nullable", + }, + { + "name": "printingpubl", + "type": "integer", + "description": "Printing and publications", + "mode": "nullable", + }, + { + "name": "topradmnexpnsa", + "type": "integer", + "description": "Total operating and administrative expenses column a", + "mode": "nullable", + }, + { + "name": "contrpdpbks", + "type": "integer", + "description": "Contributions gifts grants paid", + "mode": "nullable", + }, + { + "name": "totexpnspbks", + "type": "integer", + "description": "Total expenses", + "mode": "nullable", + }, + { + "name": "excessrcpts", + "type": "integer", + "description": "Net income less deficit", + "mode": "nullable", + }, + { + "name": "totrcptnetinc", + "type": "integer", + "description": "Total receipts net investment income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsb", + "type": "integer", + "description": "Total operating and administrative expenses column b", + "mode": "nullable", + }, + { + "name": "totexpnsnetinc", + "type": "integer", + "description": "Total expenses net investment income", + "mode": "nullable", + }, + { + "name": "netinvstinc", + "type": "integer", + "description": "Net investment income", + "mode": "nullable", + }, + { + "name": "trcptadjnetinc", + "type": "integer", + "description": "Total receipts adjusted net income", + "mode": "nullable", + }, + { + "name": "totexpnsadjnet", + "type": "integer", + "description": "Total expenses adjusted net income", + "mode": "nullable", + }, + { + "name": "adjnetinc", + "type": "integer", + "description": "Adjusted net income", + "mode": "nullable", + }, + { + "name": "topradmnexpnsd", + "type": "integer", + "description": "Total operating and administrative expenses column d", + "mode": "nullable", + }, + { + "name": "totexpnsexempt", + "type": "integer", + "description": "Total expenses – exempt purpose", + "mode": "nullable", + }, + { + "name": "othrcashamt", + "type": "integer", + "description": "Cash non-interest-bearing – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstgovtoblig", + "type": "integer", + "description": "Investments in U.S. & state government obligations – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpstk", + "type": "integer", + "description": "Investments in corporate stock – e-o-y book value", + "mode": "nullable", + }, + { + "name": "invstcorpbnd", + "type": "integer", + "description": "Investments in corporate bonds– e-o-y book value", + "mode": "nullable", + }, + { + "name": "totinvstsec", + "type": "integer", + "description": "Total investments in securities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgloans", + "type": "integer", + "description": "Investments mortgage loans – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrinvstend", + "type": "integer", + "description": "Other investments – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrassetseoy", + "type": "integer", + "description": "Other assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totassetsend", + "type": "integer", + "description": "Total assets – e-o-y book value", + "mode": "nullable", + }, + { + "name": "mrtgnotespay", + "type": "integer", + "description": "Mortgage loans payable – e-o-y book value", + "mode": "nullable", + }, + { + "name": "othrliabltseoy", + "type": "integer", + "description": "Other liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "totliabend", + "type": "integer", + "description": "Total liabilities – e-o-y book value", + "mode": "nullable", + }, + { + "name": "tfundnworth", + "type": "integer", + "description": "Total fund net worth – e-o-y book value", + "mode": "nullable", + }, + { + "name": "fairmrktvaleoy", + "type": "integer", + "description": "Total assets – e-o-y fair market value", + "mode": "nullable", + }, + { + "name": "totexcapgnls", + "type": "integer", + "description": "Capital gain net income", + "mode": "nullable", + }, + { + "name": "totexcapgn", + "type": "integer", + "description": "Net gain – sales of assets", + "mode": "nullable", + }, + { + "name": "totexcapls", + "type": "integer", + "description": "Net loss – sales of assets", + "mode": "nullable", + }, + { + "name": "invstexcisetx", + "type": "integer", + "description": "Excise tax on net investment income", + "mode": "nullable", + }, + { + "name": "sec4940notxcd", + "type": "string", + "description": "Section 4940 – no tax", + "mode": "nullable", + }, + { + "name": "sec4940redtxcd", + "type": "string", + "description": "Section 4940 – 1 % tax", + "mode": "nullable", + }, + { + "name": "sect511tx", + "type": "integer", + "description": "Section 511 tax", + "mode": "nullable", + }, + { + "name": "subtitleatx", + "type": "integer", + "description": "Subtitle A tax", + "mode": "nullable", + }, + { + "name": "totaxpyr", + "type": "integer", + "description": "Total excise tax", + "mode": "nullable", + }, + { + "name": "esttaxcr", + "type": "integer", + "description": "Estimated tax credit", + "mode": "nullable", + }, + { + "name": "txwithldsrc", + "type": "integer", + "description": "Tax withheld at source", + "mode": "nullable", + }, + { + "name": "txpaidf2758", + "type": "integer", + "description": "Tax paid with Form 2758 (filing extension)", + "mode": "nullable", + }, + { + "name": "erronbkupwthld", + "type": "integer", + "description": "Erroneous backup withholding credit amount", + "mode": "nullable", + }, + { + "name": "estpnlty", + "type": "integer", + "description": "Estimated tax penalty", + "mode": "nullable", + }, + { + "name": "taxdue", + "type": "integer", + "description": "Tax due", + "mode": "nullable", + }, + { + "name": "overpay", + "type": "integer", + "description": "Overpayment", + "mode": "nullable", + }, + { + "name": "crelamt", + "type": "integer", + "description": "Credit elect amount", + "mode": "nullable", + }, + { + "name": "infleg", + "type": "string", + "description": "Influence legislation?", + "mode": "nullable", + }, + { + "name": "actnotpr", + "type": "string", + "description": "Activities not previously reported?", + "mode": "nullable", + }, + { + "name": "chgnprvrptcd", + "type": "string", + "description": "Changes not previously reported?", + "mode": "nullable", + }, + { + "name": "filedf990tcd", + "type": "string", + "description": "Filed 990-T?", + "mode": "nullable", + }, + { + "name": "contractncd", + "type": "string", + "description": "Contraction?", + "mode": "nullable", + }, + { + "name": "furnishcpycd", + "type": "string", + "description": "Furnished copy to Attorney General?", + "mode": "nullable", + }, + { + "name": "claimstatcd", + "type": "string", + "description": "Claiming status?", + "mode": "nullable", + }, + { + "name": "cntrbtrstxyrcd", + "type": "string", + "description": "Substantial contributors?", + "mode": "nullable", + }, + { + "name": "distribdafcd", + "type": "string", + "description": "Distribution to donor advised fund with advisory privileges?", + "mode": "nullable", + }, + { + "name": "orgcmplypubcd", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "filedlf1041ind", + "type": "string", + "description": "Comply with public inspection?", + "mode": "nullable", + }, + { + "name": "propexchcd", + "type": "string", + "description": "Property exchange?", + "mode": "nullable", + }, + { + "name": "brwlndmnycd", + "type": "string", + "description": "Borrow lend money?", + "mode": "nullable", + }, + { + "name": "furngoodscd", + "type": "string", + "description": "Furnished goods?", + "mode": "nullable", + }, + { + "name": "paidcmpncd", + "type": "string", + "description": "Paid compensation?", + "mode": "nullable", + }, + { + "name": "transfercd", + "type": "string", + "description": "Transfer?", + "mode": "nullable", + }, + { + "name": "agremkpaycd", + "type": "string", + "description": "Agree to make pay?", + "mode": "nullable", + }, + { + "name": "exceptactsind", + "type": "string", + "description": "Acts fail to qualify under section 53.4941(d)-3?", + "mode": "nullable", + }, + { + "name": "prioractvcd", + "type": "string", + "description": "Engage in acts in prior year?", + "mode": "nullable", + }, + { + "name": "undistrinccd", + "type": "string", + "description": "Undistributed income?", + "mode": "nullable", + }, + { + "name": "applyprovind", + "type": "string", + "description": "Not applying section 4942(a)(2) provisions?", + "mode": "nullable", + }, + { + "name": "dirindirintcd", + "type": "string", + "description": "Direct indirect interest?", + "mode": "nullable", + }, + { + "name": "excesshldcd", + "type": "string", + "description": "Excess business holdings?", + "mode": "nullable", + }, + { + "name": "invstjexmptcd", + "type": "string", + "description": "Jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "prevjexmptcd", + "type": "string", + "description": "Prior year jeopardizing investments?", + "mode": "nullable", + }, + { + "name": "propgndacd", + "type": "string", + "description": "Propaganda?", + "mode": "nullable", + }, + { + "name": "ipubelectcd", + "type": "string", + "description": "Influence public election?", + "mode": "nullable", + }, + { + "name": "grntindivcd", + "type": "string", + "description": "Grant individual?", + "mode": "nullable", + }, + { + "name": "nchrtygrntcd", + "type": "string", + "description": "Non-charity grant?", + "mode": "nullable", + }, + { + "name": "nreligiouscd", + "type": "string", + "description": "Non-religious?", + "mode": "nullable", + }, + { + "name": "excptransind", + "type": "string", + "description": "Transactions fail to qualify under section 53.4945?", + "mode": "nullable", + }, + { + "name": "rfprsnlbnftind", + "type": "string", + "description": "Receive funds to pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "pyprsnlbnftind", + "type": "string", + "description": "Pay premiums on personal benefit contract?", + "mode": "nullable", + }, + { + "name": "tfairmrktunuse", + "type": "integer", + "description": "Fair market value of assets not used for charitable purposes", + "mode": "nullable", + }, + { + "name": "valncharitassets", + "type": "integer", + "description": "Net value of noncharitable-use assets", + "mode": "nullable", + }, + { + "name": "cmpmininvstret", + "type": "integer", + "description": "Minimum investment return", + "mode": "nullable", + }, + { + "name": "distribamt", + "type": "integer", + "description": "Distributable amount", + "mode": "nullable", + }, + { + "name": "undistribincyr", + "type": "integer", + "description": "Undistributed income", + "mode": "nullable", + }, + { + "name": "adjnetinccola", + "type": "integer", + "description": "Adjusted net income column a", + "mode": "nullable", + }, + { + "name": "adjnetinccolb", + "type": "integer", + "description": "Adjusted net income column b", + "mode": "nullable", + }, + { + "name": "adjnetinccolc", + "type": "integer", + "description": "Adjusted net income column c", + "mode": "nullable", + }, + { + "name": "adjnetinccold", + "type": "integer", + "description": "Adjusted net income column d", + "mode": "nullable", + }, + { + "name": "adjnetinctot", + "type": "integer", + "description": "Adjusted net income total", + "mode": "nullable", + }, + { + "name": "qlfydistriba", + "type": "integer", + "description": "Qualifying distributions column a", + "mode": "nullable", + }, + { + "name": "qlfydistribb", + "type": "integer", + "description": "Qualifying distributions column b", + "mode": "nullable", + }, + { + "name": "qlfydistribc", + "type": "integer", + "description": "Qualifying distributions column c", + "mode": "nullable", + }, + { + "name": "qlfydistribd", + "type": "integer", + "description": "Qualifying distributions column d", + "mode": "nullable", + }, + { + "name": "qlfydistribtot", + "type": "integer", + "description": "Qualifying distributions total", + "mode": "nullable", + }, + { + "name": "valassetscola", + "type": "integer", + "description": "Value assets column a", + "mode": "nullable", + }, + { + "name": "valassetscolb", + "type": "integer", + "description": "Value assets column b", + "mode": "nullable", + }, + { + "name": "valassetscolc", + "type": "integer", + "description": "Value assets column c", + "mode": "nullable", + }, + { + "name": "valassetscold", + "type": "integer", + "description": "Value assets column d", + "mode": "nullable", + }, + { + "name": "valassetstot", + "type": "integer", + "description": "Value assets total", + "mode": "nullable", + }, + { + "name": "qlfyasseta", + "type": "integer", + "description": "Qualifying assets column a", + "mode": "nullable", + }, + { + "name": "qlfyassetb", + "type": "integer", + "description": "Qualifying assets column b", + "mode": "nullable", + }, + { + "name": "qlfyassetc", + "type": "integer", + "description": "Qualifying assets column c", + "mode": "nullable", + }, + { + "name": "qlfyassetd", + "type": "integer", + "description": "Qualifying assets column d", + "mode": "nullable", + }, + { + "name": "qlfyassettot", + "type": "integer", + "description": "Qualifying assets total", + "mode": "nullable", + }, + { + "name": "endwmntscola", + "type": "integer", + "description": "Endowments column a", + "mode": "nullable", + }, + { + "name": "endwmntscolb", + "type": "integer", + "description": "Endowments column b", + "mode": "nullable", + }, + { + "name": "endwmntscolc", + "type": "integer", + "description": "Endowments column c", + "mode": "nullable", + }, + { + "name": "endwmntscold", + "type": "integer", + "description": "Endowments column d", + "mode": "nullable", + }, + { + "name": "endwmntstot", + "type": "integer", + "description": "Endowments total", + "mode": "nullable", + }, + { + "name": "totsuprtcola", + "type": "integer", + "description": "Total support column a", + "mode": "nullable", + }, + { + "name": "totsuprtcolb", + "type": "integer", + "description": "Total support column b", + "mode": "nullable", + }, + { + "name": "totsuprtcolc", + "type": "integer", + "description": "Total support column c", + "mode": "nullable", + }, + { + "name": "totsuprtcold", + "type": "integer", + "description": "Total support column d", + "mode": "nullable", + }, + { + "name": "totsuprttot", + "type": "integer", + "description": "Total support total", + "mode": "nullable", + }, + { + "name": "pubsuprtcola", + "type": "integer", + "description": "Public support column a", + "mode": "nullable", + }, + { + "name": "pubsuprtcolb", + "type": "integer", + "description": "Public support column b", + "mode": "nullable", + }, + { + "name": "pubsuprtcolc", + "type": "integer", + "description": "Public support column c", + "mode": "nullable", + }, + { + "name": "pubsuprtcold", + "type": "integer", + "description": "Public support column d", + "mode": "nullable", + }, + { + "name": "pubsuprttot", + "type": "integer", + "description": "Public support total", + "mode": "nullable", + }, + { + "name": "grsinvstinca", + "type": "integer", + "description": "Gross investment income column a", + "mode": "nullable", + }, + { + "name": "grsinvstincb", + "type": "integer", + "description": "Gross investment income column b", + "mode": "nullable", + }, + { + "name": "grsinvstincc", + "type": "integer", + "description": "Gross investment income column c", + "mode": "nullable", + }, + { + "name": "grsinvstincd", + "type": "integer", + "description": "Gross investment income column d", + "mode": "nullable", + }, + { + "name": "grsinvstinctot", + "type": "integer", + "description": "Gross investment income total", + "mode": "nullable", + }, + { + "name": "grntapprvfut", + "type": "integer", + "description": "Grants approved for future payment", + "mode": "nullable", + }, + { + "name": "progsrvcacold", + "type": "integer", + "description": "Program service revenue line 1a (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcacole", + "type": "integer", + "description": "Program service revenue line 1a (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcbcold", + "type": "integer", + "description": "Program service revenue line 1b (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcbcole", + "type": "integer", + "description": "Program service revenue line 1b (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcccold", + "type": "integer", + "description": "Program service revenue line 1c (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcccole", + "type": "integer", + "description": "Program service revenue line 1c (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcdcold", + "type": "integer", + "description": "Program service revenue line 1d (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcdcole", + "type": "integer", + "description": "Program service revenue line 1d (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcecold", + "type": "integer", + "description": "Program service revenue line 1e (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcecole", + "type": "integer", + "description": "Program service revenue line 1e (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcfcold", + "type": "integer", + "description": "Program service revenue line 1f (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcfcole", + "type": "integer", + "description": "Program service revenue line 1f (exempt)", + "mode": "nullable", + }, + { + "name": "progsrvcgcold", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (excluded)", + "mode": "nullable", + }, + { + "name": "progsrvcgcole", + "type": "integer", + "description": "Program service revenue--fees and contracts from government line 1g (exempt)", + "mode": "nullable", + }, + { + "name": "membershpduesd", + "type": "integer", + "description": "Membership dues and assessments (excluded)", + "mode": "nullable", + }, + { + "name": "membershpduese", + "type": "integer", + "description": "Membership dues and assessments (exempt)", + "mode": "nullable", + }, + { + "name": "intonsvngsd", + "type": "integer", + "description": "Interest on savings and temporary cash investments (excluded)", + "mode": "nullable", + }, + { + "name": "intonsvngse", + "type": "integer", + "description": "Interest on savings and temporary cash investments (exempt)", + "mode": "nullable", + }, + { + "name": "dvdndsintd", + "type": "integer", + "description": "Dividends and interest from securities (excluded)", + "mode": "nullable", + }, + { + "name": "dvdndsinte", + "type": "integer", + "description": "Dividends and interest from securities (exempt)", + "mode": "nullable", + }, + { + "name": "trnsfrcashcd", + "type": "string", + "description": "Transfer cash to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "trnsothasstscd", + "type": "string", + "description": "Transfer other assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "salesasstscd", + "type": "string", + "description": "Sale of assets to noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "prchsasstscd", + "type": "string", + "description": "Purchase of assets from noncharitable exempt organization?", + "mode": "nullable", + }, + { + "name": "rentlsfacltscd", + "type": "string", + "description": "Rental of facilities or other assets?", + "mode": "nullable", + }, + { + "name": "reimbrsmntscd", + "type": "string", + "description": "Reimbursements arrangements?", + "mode": "nullable", + }, + { + "name": "loansguarcd", + "type": "string", + "description": "Loans or other guarantees?", + "mode": "nullable", + }, + { + "name": "perfservicescd", + "type": "string", + "description": "Performance of services or membership or fundraising solicitations?", + "mode": "nullable", + }, + { + "name": "sharngasstscd", + "type": "string", + "description": "Sharing of facilities equipment mailing lists other assets or paid employees?", + "mode": "nullable", + }, + ], + ) + + irs_990_pf_2016_transform_csv >> load_irs_990_pf_2016_to_bq diff --git a/datasets/irs_990/irs_990_pf_2016/pipeline.yaml b/datasets/irs_990/irs_990_pf_2016/pipeline.yaml new file mode 100644 index 000000000..26877d559 --- /dev/null +++ b/datasets/irs_990/irs_990_pf_2016/pipeline.yaml @@ -0,0 +1,833 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + # Required Properties: + table_id: irs_990_pf_2016 + + # Description of the table + description: "irs_990_pf_2016 dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: irs_990_pf_2016 + default_args: + owner: "Google" + + # When set to True, keeps a task from getting triggered if the previous schedule for the task hasn’t succeeded + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + + # Task description + description: "Run CSV transform within kubernetes pod" + + args: + + task_id: "irs_990_pf_2016_transform_csv" + + startup_timeout_seconds: 600 + + # The name of the pod in which the task will run. This will be used (plus a random suffix) to generate a pod id + name: "irs_990_pf_2016" + + # The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines. + namespace: "default" + + image_pull_policy: "Always" + + # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}" + + # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. + env_vars: + SOURCE_URL: "https://www.irs.gov/pub/irs-soi/16eofinextract990pf.dat" + SOURCE_FILE: "files/data.dat" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.json.shared.composer_bucket }}" + TARGET_GCS_PATH: "data/irs_990/irs_990_pf_2016/data_output.csv" + PIPELINE_NAME: "irs_990_pf_2016" + CSV_HEADERS: >- + ["ein","elf","tax_prd","eostatus","tax_yr","operatingcd","subcd","fairmrktvalamt","grscontrgifts","schedbind","intrstrvnue","dividndsamt","grsrents","grsslspramt","costsold","grsprofitbus","otherincamt","totrcptperbks","compofficers","pensplemplbenf","legalfeesamt","accountingfees","interestamt","depreciationamt","occupancyamt","travlconfmtngs","printingpubl","topradmnexpnsa","contrpdpbks","totexpnspbks","excessrcpts","totrcptnetinc","topradmnexpnsb","totexpnsnetinc","netinvstinc","trcptadjnetinc","totexpnsadjnet","adjnetinc","topradmnexpnsd","totexpnsexempt","othrcashamt","invstgovtoblig","invstcorpstk","invstcorpbnd","totinvstsec","mrtgloans","othrinvstend","othrassetseoy","totassetsend","mrtgnotespay","othrliabltseoy","totliabend","tfundnworth","fairmrktvaleoy","totexcapgnls","totexcapgn","totexcapls","invstexcisetx","sec4940notxcd","sec4940redtxcd","sect511tx","subtitleatx","totaxpyr","esttaxcr","txwithldsrc","txpaidf2758","erronbkupwthld","estpnlty","taxdue","overpay","crelamt","infleg","actnotpr","chgnprvrptcd","filedf990tcd","contractncd","furnishcpycd","claimstatcd","cntrbtrstxyrcd","distribdafcd","orgcmplypubcd","filedlf1041ind","propexchcd","brwlndmnycd","furngoodscd","paidcmpncd","transfercd","agremkpaycd","exceptactsind","prioractvcd","undistrinccd","applyprovind","dirindirintcd","excesshldcd","invstjexmptcd","prevjexmptcd","propgndacd","ipubelectcd","grntindivcd","nchrtygrntcd","nreligiouscd","excptransind","rfprsnlbnftind","pyprsnlbnftind","tfairmrktunuse","valncharitassets","cmpmininvstret","distribamt","undistribincyr","adjnetinccola","adjnetinccolb","adjnetinccolc","adjnetinccold","adjnetinctot","qlfydistriba","qlfydistribb","qlfydistribc","qlfydistribd","qlfydistribtot","valassetscola","valassetscolb","valassetscolc","valassetscold","valassetstot","qlfyasseta","qlfyassetb","qlfyassetc","qlfyassetd","qlfyassettot","endwmntscola","endwmntscolb","endwmntscolc","endwmntscold","endwmntstot","totsuprtcola","totsuprtcolb","totsuprtcolc","totsuprtcold","totsuprttot","pubsuprtcola","pubsuprtcolb","pubsuprtcolc","pubsuprtcold","pubsuprttot","grsinvstinca","grsinvstincb","grsinvstincc","grsinvstincd","grsinvstinctot","grntapprvfut","progsrvcacold","progsrvcacole","progsrvcbcold","progsrvcbcole","progsrvcccold","progsrvcccole","progsrvcdcold","progsrvcdcole","progsrvcecold","progsrvcecole","progsrvcfcold","progsrvcfcole","progsrvcgcold","progsrvcgcole","membershpduesd","membershpduese","intonsvngsd","intonsvngse","dvdndsintd","dvdndsinte","trnsfrcashcd","trnsothasstscd","salesasstscd","prchsasstscd","rentlsfacltscd","reimbrsmntscd","loansguarcd","perfservicescd","sharngasstscd"] + RENAME_MAPPINGS: >- + {"ELF": "elf","ELFCD": "elf","EIN": "ein","TAX_PRD": "tax_prd","EOSTATUS": "eostatus","TAX_YR": "tax_yr","OPERATINGCD": "operatingcd","SUBCD": "subcd","FAIRMRKTVALAMT": "fairmrktvalamt","GRSCONTRGIFTS": "grscontrgifts","SCHEDBIND": "schedbind","INTRSTRVNUE": "intrstrvnue","DIVIDNDSAMT": "dividndsamt","GRSRENTS": "grsrents","GRSSLSPRAMT": "grsslspramt","COSTSOLD": "costsold","GRSPROFITBUS": "grsprofitbus","OTHERINCAMT": "otherincamt","TOTRCPTPERBKS": "totrcptperbks","COMPOFFICERS": "compofficers","PENSPLEMPLBENF": "pensplemplbenf","LEGALFEESAMT": "legalfeesamt","ACCOUNTINGFEES": "accountingfees","INTERESTAMT": "interestamt","DEPRECIATIONAMT": "depreciationamt","OCCUPANCYAMT": "occupancyamt","TRAVLCONFMTNGS": "travlconfmtngs","PRINTINGPUBL": "printingpubl","TOPRADMNEXPNSA": "topradmnexpnsa","CONTRPDPBKS": "contrpdpbks","TOTEXPNSPBKS": "totexpnspbks","EXCESSRCPTS": "excessrcpts","TOTRCPTNETINC": "totrcptnetinc","TOPRADMNEXPNSB": "topradmnexpnsb","TOTEXPNSNETINC": "totexpnsnetinc","NETINVSTINC": "netinvstinc","TRCPTADJNETINC": "trcptadjnetinc","TOTEXPNSADJNET": "totexpnsadjnet","ADJNETINC": "adjnetinc","TOPRADMNEXPNSD": "topradmnexpnsd","TOTEXPNSEXEMPT": "totexpnsexempt","OTHRCASHAMT": "othrcashamt","INVSTGOVTOBLIG": "invstgovtoblig","INVSTCORPSTK": "invstcorpstk","INVSTCORPBND": "invstcorpbnd","TOTINVSTSEC": "totinvstsec","MRTGLOANS": "mrtgloans","OTHRINVSTEND": "othrinvstend","OTHRASSETSEOY": "othrassetseoy","TOTASSETSEND": "totassetsend","MRTGNOTESPAY": "mrtgnotespay","OTHRLIABLTSEOY": "othrliabltseoy","TOTLIABEND": "totliabend","TFUNDNWORTH": "tfundnworth","FAIRMRKTVALEOY": "fairmrktvaleoy","TOTEXCAPGNLS": "totexcapgnls","TOTEXCAPGN": "totexcapgn","TOTEXCAPLS": "totexcapls","INVSTEXCISETX": "invstexcisetx","SEC4940NOTXCD": "sec4940notxcd","SEC4940REDTXCD": "sec4940redtxcd","SECT511TX": "sect511tx","SUBTITLEATX": "subtitleatx","TOTAXPYR": "totaxpyr","ESTTAXCR": "esttaxcr","TXWITHLDSRC": "txwithldsrc","TXPAIDF2758": "txpaidf2758","ERRONBKUPWTHLD": "erronbkupwthld","ESTPNLTY": "estpnlty","TAXDUE": "taxdue","OVERPAY": "overpay","CRELAMT": "crelamt","INFLEG": "infleg","ACTNOTPR": "actnotpr","CHGNPRVRPTCD": "chgnprvrptcd","FILEDF990TCD": "filedf990tcd","CONTRACTNCD": "contractncd","FURNISHCPYCD": "furnishcpycd","CLAIMSTATCD": "claimstatcd","CNTRBTRSTXYRCD": "cntrbtrstxyrcd","DISTRIBDAFCD": "distribdafcd","ACQDRINDRINTCD": "distribdafcd","ORGCMPLYPUBCD": "orgcmplypubcd","FILEDLF1041IND": "filedlf1041ind","PROPEXCHCD": "propexchcd","BRWLNDMNYCD": "brwlndmnycd","FURNGOODSCD": "furngoodscd","PAIDCMPNCD": "paidcmpncd","TRANSFERCD": "transfercd","AGREMKPAYCD": "agremkpaycd","EXCEPTACTSIND": "exceptactsind","PRIORACTVCD": "prioractvcd","UNDISTRINCCD": "undistrinccd","APPLYPROVIND": "applyprovind","DIRINDIRINTCD": "dirindirintcd","EXCESSHLDCD": "excesshldcd","INVSTJEXMPTCD": "invstjexmptcd","PREVJEXMPTCD": "prevjexmptcd","PROPGNDACD": "propgndacd","IPUBELECTCD": "ipubelectcd","GRNTINDIVCD": "grntindivcd","NCHRTYGRNTCD": "nchrtygrntcd","NRELIGIOUSCD": "nreligiouscd","EXCPTRANSIND": "excptransind","RFPRSNLBNFTIND": "rfprsnlbnftind","PYPRSNLBNFTIND": "pyprsnlbnftind","TFAIRMRKTUNUSE": "tfairmrktunuse","VALNCHARITASSETS": "valncharitassets","CMPMININVSTRET": "cmpmininvstret","DISTRIBAMT": "distribamt","UNDISTRIBINCYR": "undistribincyr","ADJNETINCCOLA": "adjnetinccola","ADJNETINCCOLB": "adjnetinccolb","ADJNETINCCOLC": "adjnetinccolc","ADJNETINCCOLD": "adjnetinccold","ADJNETINCTOT": "adjnetinctot","QLFYDISTRIBA": "qlfydistriba","QLFYDISTRIBB": "qlfydistribb","QLFYDISTRIBC": "qlfydistribc","QLFYDISTRIBD": "qlfydistribd","QLFYDISTRIBTOT": "qlfydistribtot","VALASSETSCOLA": "valassetscola","VALASSETSCOLB": "valassetscolb","VALASSETSCOLC": "valassetscolc","VALASSETSCOLD": "valassetscold","VALASSETSTOT": "valassetstot","QLFYASSETA": "qlfyasseta","QLFYASSETB": "qlfyassetb","QLFYASSETC": "qlfyassetc","QLFYASSETD": "qlfyassetd","QLFYASSETTOT": "qlfyassettot","ENDWMNTSCOLA": "endwmntscola","ENDWMNTSCOLB": "endwmntscolb","ENDWMNTSCOLC": "endwmntscolc","ENDWMNTSCOLD": "endwmntscold","ENDWMNTSTOT": "endwmntstot","TOTSUPRTCOLA": "totsuprtcola","TOTSUPRTCOLB": "totsuprtcolb","TOTSUPRTCOLC": "totsuprtcolc","TOTSUPRTCOLD": "totsuprtcold","TOTSUPRTTOT": "totsuprttot","PUBSUPRTCOLA": "pubsuprtcola","PUBSUPRTCOLB": "pubsuprtcolb","PUBSUPRTCOLC": "pubsuprtcolc","PUBSUPRTCOLD": "pubsuprtcold","PUBSUPRTTOT": "pubsuprttot","GRSINVSTINCA": "grsinvstinca","GRSINVSTINCB": "grsinvstincb","GRSINVSTINCC": "grsinvstincc","GRSINVSTINCD": "grsinvstincd","GRSINVSTINCTOT": "grsinvstinctot","GRNTAPPRVFUT": "grntapprvfut","PROGSRVCACOLD": "progsrvcacold","PROGSRVCACOLE": "progsrvcacole","PROGSRVCBCOLD": "progsrvcbcold","PROGSRVCBCOLE": "progsrvcbcole","PROGSRVCCCOLD": "progsrvcccold","PROGSRVCCCOLE": "progsrvcccole","PROGSRVCDCOLD": "progsrvcdcold","PROGSRVCDCOLE": "progsrvcdcole","PROGSRVCECOLD": "progsrvcecold","PROGSRVCECOLE": "progsrvcecole","PROGSRVCFCOLD": "progsrvcfcold","PROGSRVCFCOLE": "progsrvcfcole","PROGSRVCGCOLD": "progsrvcgcold","PROGSRVCGCOLE": "progsrvcgcole","MEMBERSHPDUESD": "membershpduesd","MEMBERSHPDUESE": "membershpduese","INTONSVNGSD": "intonsvngsd","INTONSVNGSE": "intonsvngse","DVDNDSINTD": "dvdndsintd","DVDNDSINTE": "dvdndsinte","TRNSFRCASHCD": "trnsfrcashcd","TRNSOTHASSTSCD": "trnsothasstscd","SALESASSTSCD": "salesasstscd","PRCHSASSTSCD": "prchsasstscd","RENTLSFACLTSCD": "rentlsfacltscd","REIMBRSMNTSCD": "reimbrsmntscd","LOANSGUARCD": "loansguarcd","PERFSERVICESCD": "perfservicescd","SHARNGASSTSCD": "sharngasstscd"} + + + # Set resource limits for the pod here. For resource units in Kubernetes, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes + resources: + request_memory: "2G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + + args: + task_id: "load_irs_990_pf_2016_to_bq" + + # The GCS bucket where the CSV file is located in. + bucket: "{{ var.json.shared.composer_bucket }}" + + # The GCS object path for the CSV file + source_objects: ["data/irs_990/irs_990_pf_2016/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "irs_990.irs_990_pf_2016" + + # Use this if your CSV file contains a header row + skip_leading_rows: 1 + + # How to write data to the table: overwrite, append, or write if empty + # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition + write_disposition: "WRITE_TRUNCATE" + + # The BigQuery table schema based on the CSV file. For more info, see + # https://cloud.google.com/bigquery/docs/schemas. + # Always use snake_case and lowercase for column names, and be explicit, + # i.e. specify modes for all columns. + + schema_fields: + - name : "ein" + type : "string" + description : "Employer Identification Number" + mode : "required" + - name : "elf" + type : "string" + description : "E-file indicator" + mode : "nullable" + - name : "tax_prd" + type : "string" + description : "Tax period (YYYYMM format)" + mode : "nullable" + - name : "eostatus" + type : "string" + description : "EO Status Code" + mode : "nullable" + - name : "tax_yr" + type : "integer" + description : "SOI Year" + mode : "nullable" + - name : "operatingcd" + type : "string" + description : "Operating foundation code" + mode : "nullable" + - name : "subcd" + type : "string" + description : "Subsection code" + mode : "nullable" + - name : "fairmrktvalamt" + type : "integer" + description : "Total assets – e-o-y fair market valu" + mode : "nullable" + - name : "grscontrgifts" + type : "integer" + description : "Contributions received" + mode : "nullable" + - name : "schedbind" + type : "string" + description : "Schedule B indicator" + mode : "nullable" + - name : "intrstrvnue" + type : "integer" + description : "Interest revenue" + mode : "nullable" + - name : "dividndsamt" + type : "integer" + description : "" + mode : "nullable" + - name : "grsrents" + type : "integer" + description : "Gross rents" + mode : "nullable" + - name : "grsslspramt" + type : "integer" + description : "Gross sales price for assets" + mode : "nullable" + - name : "costsold" + type : "integer" + description : "Cost-of-goods-sold" + mode : "nullable" + - name : "grsprofitbus" + type : "integer" + description : "Gross profit" + mode : "nullable" + - name : "otherincamt" + type : "integer" + description : "Other income" + mode : "nullable" + - name : "totrcptperbks" + type : "integer" + description : "Total revenue" + mode : "nullable" + - name : "compofficers" + type : "integer" + description : "Compensation of officers" + mode : "nullable" + - name : "pensplemplbenf" + type : "integer" + description : "Pension plans employee benefits" + mode : "nullable" + - name : "legalfeesamt" + type : "integer" + description : "Legal fees" + mode : "nullable" + - name : "accountingfees" + type : "integer" + description : "Accounting fees" + mode : "nullable" + - name : "interestamt" + type : "integer" + description : "Interest" + mode : "nullable" + - name : "depreciationamt" + type : "integer" + description : "Depreciation and depletion" + mode : "nullable" + - name : "occupancyamt" + type : "integer" + description : "Occupancy" + mode : "nullable" + - name : "travlconfmtngs" + type : "integer" + description : "Travel conferences and meetings" + mode : "nullable" + - name : "printingpubl" + type : "integer" + description : "Printing and publications" + mode : "nullable" + - name : "topradmnexpnsa" + type : "integer" + description : "Total operating and administrative expenses column a" + mode : "nullable" + - name : "contrpdpbks" + type : "integer" + description : "Contributions gifts grants paid" + mode : "nullable" + - name : "totexpnspbks" + type : "integer" + description : "Total expenses" + mode : "nullable" + - name : "excessrcpts" + type : "integer" + description : "Net income less deficit" + mode : "nullable" + - name : "totrcptnetinc" + type : "integer" + description : "Total receipts net investment income" + mode : "nullable" + - name : "topradmnexpnsb" + type : "integer" + description : "Total operating and administrative expenses column b" + mode : "nullable" + - name : "totexpnsnetinc" + type : "integer" + description : "Total expenses net investment income" + mode : "nullable" + - name : "netinvstinc" + type : "integer" + description : "Net investment income" + mode : "nullable" + - name : "trcptadjnetinc" + type : "integer" + description : "Total receipts adjusted net income" + mode : "nullable" + - name : "totexpnsadjnet" + type : "integer" + description : "Total expenses adjusted net income" + mode : "nullable" + - name : "adjnetinc" + type : "integer" + description : "Adjusted net income" + mode : "nullable" + - name : "topradmnexpnsd" + type : "integer" + description : "Total operating and administrative expenses column d" + mode : "nullable" + - name : "totexpnsexempt" + type : "integer" + description : "Total expenses – exempt purpose" + mode : "nullable" + - name : "othrcashamt" + type : "integer" + description : "Cash non-interest-bearing – e-o-y book value" + mode : "nullable" + - name : "invstgovtoblig" + type : "integer" + description : "Investments in U.S. & state government obligations – e-o-y book value" + mode : "nullable" + - name : "invstcorpstk" + type : "integer" + description : "Investments in corporate stock – e-o-y book value" + mode : "nullable" + - name : "invstcorpbnd" + type : "integer" + description : "Investments in corporate bonds– e-o-y book value" + mode : "nullable" + - name : "totinvstsec" + type : "integer" + description : "Total investments in securities – e-o-y book value" + mode : "nullable" + - name : "mrtgloans" + type : "integer" + description : "Investments mortgage loans – e-o-y book value" + mode : "nullable" + - name : "othrinvstend" + type : "integer" + description : "Other investments – e-o-y book value" + mode : "nullable" + - name : "othrassetseoy" + type : "integer" + description : "Other assets – e-o-y book value" + mode : "nullable" + - name : "totassetsend" + type : "integer" + description : "Total assets – e-o-y book value" + mode : "nullable" + - name : "mrtgnotespay" + type : "integer" + description : "Mortgage loans payable – e-o-y book value" + mode : "nullable" + - name : "othrliabltseoy" + type : "integer" + description : "Other liabilities – e-o-y book value" + mode : "nullable" + - name : "totliabend" + type : "integer" + description : "Total liabilities – e-o-y book value" + mode : "nullable" + - name : "tfundnworth" + type : "integer" + description : "Total fund net worth – e-o-y book value" + mode : "nullable" + - name : "fairmrktvaleoy" + type : "integer" + description : "Total assets – e-o-y fair market value" + mode : "nullable" + - name : "totexcapgnls" + type : "integer" + description : "Capital gain net income" + mode : "nullable" + - name : "totexcapgn" + type : "integer" + description : "Net gain – sales of assets" + mode : "nullable" + - name : "totexcapls" + type : "integer" + description : "Net loss – sales of assets" + mode : "nullable" + - name : "invstexcisetx" + type : "integer" + description : "Excise tax on net investment income" + mode : "nullable" + - name : "sec4940notxcd" + type : "string" + description : "Section 4940 – no tax" + mode : "nullable" + - name : "sec4940redtxcd" + type : "string" + description : "Section 4940 – 1 % tax" + mode : "nullable" + - name : "sect511tx" + type : "integer" + description : "Section 511 tax" + mode : "nullable" + - name : "subtitleatx" + type : "integer" + description : "Subtitle A tax" + mode : "nullable" + - name : "totaxpyr" + type : "integer" + description : "Total excise tax" + mode : "nullable" + - name : "esttaxcr" + type : "integer" + description : "Estimated tax credit" + mode : "nullable" + - name : "txwithldsrc" + type : "integer" + description : "Tax withheld at source" + mode : "nullable" + - name : "txpaidf2758" + type : "integer" + description : "Tax paid with Form 2758 (filing extension)" + mode : "nullable" + - name : "erronbkupwthld" + type : "integer" + description : "Erroneous backup withholding credit amount" + mode : "nullable" + - name : "estpnlty" + type : "integer" + description : "Estimated tax penalty" + mode : "nullable" + - name : "taxdue" + type : "integer" + description : "Tax due" + mode : "nullable" + - name : "overpay" + type : "integer" + description : "Overpayment" + mode : "nullable" + - name : "crelamt" + type : "integer" + description : "Credit elect amount" + mode : "nullable" + - name : "infleg" + type : "string" + description : "Influence legislation?" + mode : "nullable" + - name : "actnotpr" + type : "string" + description : "Activities not previously reported?" + mode : "nullable" + - name : "chgnprvrptcd" + type : "string" + description : "Changes not previously reported?" + mode : "nullable" + - name : "filedf990tcd" + type : "string" + description : "Filed 990-T?" + mode : "nullable" + - name : "contractncd" + type : "string" + description : "Contraction?" + mode : "nullable" + - name : "furnishcpycd" + type : "string" + description : "Furnished copy to Attorney General?" + mode : "nullable" + - name : "claimstatcd" + type : "string" + description : "Claiming status?" + mode : "nullable" + - name : "cntrbtrstxyrcd" + type : "string" + description : "Substantial contributors?" + mode : "nullable" + - name : "distribdafcd" + type : "string" + description : "Distribution to donor advised fund with advisory privileges?" + mode : "nullable" + - name : "orgcmplypubcd" + type : "string" + description : "Comply with public inspection?" + mode : "nullable" + - name : "filedlf1041ind" + type : "string" + description : "Comply with public inspection?" + mode : "nullable" + - name : "propexchcd" + type : "string" + description : "Property exchange?" + mode : "nullable" + - name : "brwlndmnycd" + type : "string" + description : "Borrow lend money?" + mode : "nullable" + - name : "furngoodscd" + type : "string" + description : "Furnished goods?" + mode : "nullable" + - name : "paidcmpncd" + type : "string" + description : "Paid compensation?" + mode : "nullable" + - name : "transfercd" + type : "string" + description : "Transfer?" + mode : "nullable" + - name : "agremkpaycd" + type : "string" + description : "Agree to make pay?" + mode : "nullable" + - name : "exceptactsind" + type : "string" + description : "Acts fail to qualify under section 53.4941(d)-3?" + mode : "nullable" + - name : "prioractvcd" + type : "string" + description : "Engage in acts in prior year?" + mode : "nullable" + - name : "undistrinccd" + type : "string" + description : "Undistributed income?" + mode : "nullable" + - name : "applyprovind" + type : "string" + description : "Not applying section 4942(a)(2) provisions?" + mode : "nullable" + - name : "dirindirintcd" + type : "string" + description : "Direct indirect interest?" + mode : "nullable" + - name : "excesshldcd" + type : "string" + description : "Excess business holdings?" + mode : "nullable" + - name : "invstjexmptcd" + type : "string" + description : "Jeopardizing investments?" + mode : "nullable" + - name : "prevjexmptcd" + type : "string" + description : "Prior year jeopardizing investments?" + mode : "nullable" + - name : "propgndacd" + type : "string" + description : "Propaganda?" + mode : "nullable" + - name : "ipubelectcd" + type : "string" + description : "Influence public election?" + mode : "nullable" + - name : "grntindivcd" + type : "string" + description : "Grant individual?" + mode : "nullable" + - name : "nchrtygrntcd" + type : "string" + description : "Non-charity grant?" + mode : "nullable" + - name : "nreligiouscd" + type : "string" + description : "Non-religious?" + mode : "nullable" + - name : "excptransind" + type : "string" + description : "Transactions fail to qualify under section 53.4945?" + mode : "nullable" + - name : "rfprsnlbnftind" + type : "string" + description : "Receive funds to pay premiums on personal benefit contract?" + mode : "nullable" + - name : "pyprsnlbnftind" + type : "string" + description : "Pay premiums on personal benefit contract?" + mode : "nullable" + - name : "tfairmrktunuse" + type : "integer" + description : "Fair market value of assets not used for charitable purposes" + mode : "nullable" + - name : "valncharitassets" + type : "integer" + description : "Net value of noncharitable-use assets" + mode : "nullable" + - name : "cmpmininvstret" + type : "integer" + description : "Minimum investment return" + mode : "nullable" + - name : "distribamt" + type : "integer" + description : "Distributable amount" + mode : "nullable" + - name : "undistribincyr" + type : "integer" + description : "Undistributed income" + mode : "nullable" + - name : "adjnetinccola" + type : "integer" + description : "Adjusted net income column a" + mode : "nullable" + - name : "adjnetinccolb" + type : "integer" + description : "Adjusted net income column b" + mode : "nullable" + - name : "adjnetinccolc" + type : "integer" + description : "Adjusted net income column c" + mode : "nullable" + - name : "adjnetinccold" + type : "integer" + description : "Adjusted net income column d" + mode : "nullable" + - name : "adjnetinctot" + type : "integer" + description : "Adjusted net income total" + mode : "nullable" + - name : "qlfydistriba" + type : "integer" + description : "Qualifying distributions column a" + mode : "nullable" + - name : "qlfydistribb" + type : "integer" + description : "Qualifying distributions column b" + mode : "nullable" + - name : "qlfydistribc" + type : "integer" + description : "Qualifying distributions column c" + mode : "nullable" + - name : "qlfydistribd" + type : "integer" + description : "Qualifying distributions column d" + mode : "nullable" + - name : "qlfydistribtot" + type : "integer" + description : "Qualifying distributions total" + mode : "nullable" + - name : "valassetscola" + type : "integer" + description : "Value assets column a" + mode : "nullable" + - name : "valassetscolb" + type : "integer" + description : "Value assets column b" + mode : "nullable" + - name : "valassetscolc" + type : "integer" + description : "Value assets column c" + mode : "nullable" + - name : "valassetscold" + type : "integer" + description : "Value assets column d" + mode : "nullable" + - name : "valassetstot" + type : "integer" + description : "Value assets total" + mode : "nullable" + - name : "qlfyasseta" + type : "integer" + description : "Qualifying assets column a" + mode : "nullable" + - name : "qlfyassetb" + type : "integer" + description : "Qualifying assets column b" + mode : "nullable" + - name : "qlfyassetc" + type : "integer" + description : "Qualifying assets column c" + mode : "nullable" + - name : "qlfyassetd" + type : "integer" + description : "Qualifying assets column d" + mode : "nullable" + - name : "qlfyassettot" + type : "integer" + description : "Qualifying assets total" + mode : "nullable" + - name : "endwmntscola" + type : "integer" + description : "Endowments column a" + mode : "nullable" + - name : "endwmntscolb" + type : "integer" + description : "Endowments column b" + mode : "nullable" + - name : "endwmntscolc" + type : "integer" + description : "Endowments column c" + mode : "nullable" + - name : "endwmntscold" + type : "integer" + description : "Endowments column d" + mode : "nullable" + - name : "endwmntstot" + type : "integer" + description : "Endowments total" + mode : "nullable" + - name : "totsuprtcola" + type : "integer" + description : "Total support column a" + mode : "nullable" + - name : "totsuprtcolb" + type : "integer" + description : "Total support column b" + mode : "nullable" + - name : "totsuprtcolc" + type : "integer" + description : "Total support column c" + mode : "nullable" + - name : "totsuprtcold" + type : "integer" + description : "Total support column d" + mode : "nullable" + - name : "totsuprttot" + type : "integer" + description : "Total support total" + mode : "nullable" + - name : "pubsuprtcola" + type : "integer" + description : "Public support column a" + mode : "nullable" + - name : "pubsuprtcolb" + type : "integer" + description : "Public support column b" + mode : "nullable" + - name : "pubsuprtcolc" + type : "integer" + description : "Public support column c" + mode : "nullable" + - name : "pubsuprtcold" + type : "integer" + description : "Public support column d" + mode : "nullable" + - name : "pubsuprttot" + type : "integer" + description : "Public support total" + mode : "nullable" + - name : "grsinvstinca" + type : "integer" + description : "Gross investment income column a" + mode : "nullable" + - name : "grsinvstincb" + type : "integer" + description : "Gross investment income column b" + mode : "nullable" + - name : "grsinvstincc" + type : "integer" + description : "Gross investment income column c" + mode : "nullable" + - name : "grsinvstincd" + type : "integer" + description : "Gross investment income column d" + mode : "nullable" + - name : "grsinvstinctot" + type : "integer" + description : "Gross investment income total" + mode : "nullable" + - name : "grntapprvfut" + type : "integer" + description : "Grants approved for future payment" + mode : "nullable" + - name : "progsrvcacold" + type : "integer" + description : "Program service revenue line 1a (excluded)" + mode : "nullable" + - name : "progsrvcacole" + type : "integer" + description : "Program service revenue line 1a (exempt)" + mode : "nullable" + - name : "progsrvcbcold" + type : "integer" + description : "Program service revenue line 1b (excluded)" + mode : "nullable" + - name : "progsrvcbcole" + type : "integer" + description : "Program service revenue line 1b (exempt)" + mode : "nullable" + - name : "progsrvcccold" + type : "integer" + description : "Program service revenue line 1c (excluded)" + mode : "nullable" + - name : "progsrvcccole" + type : "integer" + description : "Program service revenue line 1c (exempt)" + mode : "nullable" + - name : "progsrvcdcold" + type : "integer" + description : "Program service revenue line 1d (excluded)" + mode : "nullable" + - name : "progsrvcdcole" + type : "integer" + description : "Program service revenue line 1d (exempt)" + mode : "nullable" + - name : "progsrvcecold" + type : "integer" + description : "Program service revenue line 1e (excluded)" + mode : "nullable" + - name : "progsrvcecole" + type : "integer" + description : "Program service revenue line 1e (exempt)" + mode : "nullable" + - name : "progsrvcfcold" + type : "integer" + description : "Program service revenue line 1f (excluded)" + mode : "nullable" + - name : "progsrvcfcole" + type : "integer" + description : "Program service revenue line 1f (exempt)" + mode : "nullable" + - name : "progsrvcgcold" + type : "integer" + description : "Program service revenue--fees and contracts from government line 1g (excluded)" + mode : "nullable" + - name : "progsrvcgcole" + type : "integer" + description : "Program service revenue--fees and contracts from government line 1g (exempt)" + mode : "nullable" + - name : "membershpduesd" + type : "integer" + description : "Membership dues and assessments (excluded)" + mode : "nullable" + - name : "membershpduese" + type : "integer" + description : "Membership dues and assessments (exempt)" + mode : "nullable" + - name : "intonsvngsd" + type : "integer" + description : "Interest on savings and temporary cash investments (excluded)" + mode : "nullable" + - name : "intonsvngse" + type : "integer" + description : "Interest on savings and temporary cash investments (exempt)" + mode : "nullable" + - name : "dvdndsintd" + type : "integer" + description : "Dividends and interest from securities (excluded)" + mode : "nullable" + - name : "dvdndsinte" + type : "integer" + description : "Dividends and interest from securities (exempt)" + mode : "nullable" + - name : "trnsfrcashcd" + type : "string" + description : "Transfer cash to noncharitable exempt organization?" + mode : "nullable" + - name : "trnsothasstscd" + type : "string" + description : "Transfer other assets to noncharitable exempt organization?" + mode : "nullable" + - name : "salesasstscd" + type : "string" + description : "Sale of assets to noncharitable exempt organization?" + mode : "nullable" + - name : "prchsasstscd" + type : "string" + description : "Purchase of assets from noncharitable exempt organization?" + mode : "nullable" + - name : "rentlsfacltscd" + type : "string" + description : "Rental of facilities or other assets?" + mode : "nullable" + - name : "reimbrsmntscd" + type : "string" + description : "Reimbursements arrangements?" + mode : "nullable" + - name : "loansguarcd" + type : "string" + description : "Loans or other guarantees?" + mode : "nullable" + - name : "perfservicescd" + type : "string" + description : "Performance of services or membership or fundraising solicitations?" + mode : "nullable" + - name : "sharngasstscd" + type : "string" + description : "Sharing of facilities equipment mailing lists other assets or paid employees?" + mode : "nullable" + + + graph_paths: + - "irs_990_pf_2016_transform_csv >> load_irs_990_pf_2016_to_bq" + + + + + \ No newline at end of file From a8c0a906152c65c8d98e7848350cecec84d140f8 Mon Sep 17 00:00:00 2001 From: Dipannita Banerjee Date: Wed, 25 Aug 2021 15:24:57 +0000 Subject: [PATCH 2/4] feat: Onboard IRS 990 dataset --- datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py b/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py index a3466db9e..2bdbec198 100644 --- a/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py +++ b/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py @@ -14,9 +14,7 @@ from airflow import DAG -from airflow.contrib.operators import gcs_to_bq -from airflow.contrib.operators import kubernetes_pod_operator - +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator default_args = { "owner": "Google", From 777cb13736e3d937f9a462d4e45cbe341ff3e8e2 Mon Sep 17 00:00:00 2001 From: Dipannita Banerjee Date: Thu, 26 Aug 2021 10:03:07 +0000 Subject: [PATCH 3/4] feat: Onboarding IRS 990 dataset --- .../run_csv_transform_kub/csv_transform.py | 37 +- .../run_csv_transform_kub/requirements.txt | 1 - datasets/irs_990/dataset.yaml | 4 +- datasets/irs_990/irs_990_2014/pipeline.yaml | 1474 ++++++++-------- datasets/irs_990/irs_990_2015/pipeline.yaml | 1480 ++++++++-------- datasets/irs_990/irs_990_2016/pipeline.yaml | 1485 ++++++++-------- datasets/irs_990/irs_990_2017/pipeline.yaml | 1487 ++++++++--------- .../irs_990/irs_990_ez_2014/pipeline.yaml | 435 +++-- .../irs_990/irs_990_ez_2015/pipeline.yaml | 441 +++-- .../irs_990/irs_990_ez_2016/pipeline.yaml | 441 +++-- .../irs_990_ez_2017/irs_990_ez_2017_dag.py | 2 +- .../irs_990/irs_990_ez_2017/pipeline.yaml | 441 +++-- .../irs_990_pf_2014/irs_990_pf_2014_dag.py | 2 +- .../irs_990/irs_990_pf_2014/pipeline.yaml | 1435 ++++++++-------- .../irs_990_pf_2015/irs_990_pf_2015_dag.py | 2 +- .../irs_990/irs_990_pf_2015/pipeline.yaml | 1443 ++++++++-------- .../irs_990_pf_2016/irs_990_pf_2016_dag.py | 2 +- .../irs_990/irs_990_pf_2016/pipeline.yaml | 1443 ++++++++-------- 18 files changed, 5998 insertions(+), 6057 deletions(-) diff --git a/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py b/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py index cbdbb8c0c..e9af82792 100644 --- a/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py +++ b/datasets/irs_990/_images/run_csv_transform_kub/csv_transform.py @@ -24,8 +24,6 @@ from urllib.parse import urlparse import pandas as pd - -# import numpy as np import requests from google.cloud import storage @@ -39,7 +37,7 @@ def main( headers: typing.List[str], rename_mappings: dict, pipeline_name: str, -): +) -> None: logging.info( f"irs 990 {pipeline_name} process started at " @@ -52,9 +50,7 @@ def main( logging.info(f"Downloading file from {source_url}... ") download_file(source_url, source_file) - # open the input file - logging.info(f"Opening file {source_file}... ") - + logging.info(f"Opening file {source_file} ... ") str_value = os.path.basename(urlparse(source_url).path) if re.search("zip", str_value): @@ -64,22 +60,15 @@ def main( else: df = pd.read_csv(str(source_file), encoding="utf-8", sep=r"\s+") - # steps in the pipeline - logging.info(f"Transforming.. {source_file}") - - logging.info(f"Transform: Rename columns.. {source_file}") + logging.info(f"Transforming {source_file} ...") + logging.info(f"Transform: Rename columns {source_file} ...") rename_headers(df, rename_mappings) - logging.info(f"Transform: filtering null values.. {source_file}") - + logging.info(f"Transform: filtering null values {source_file} ...") filter_null_rows(df) - # logging.info("Transform: Converting to integr.. ") - - # df["totsupp509"] = df["totsupp509"].apply(convert_to_int) - - logging.info(f"Transform: converting to integer.. {source_file}") + logging.info(f"Transform: converting to integer {source_file} ...") if re.search("pf", pipeline_name): df.invstexcisetx = df.invstexcisetx.replace("N", 0) @@ -90,19 +79,17 @@ def main( df["totsupp509"] = df["totsupp509"].apply(convert_to_int) logging.info( - f"Transform: Reordering headers for.. {os.path.basename(urlparse(source_url).path)}" + f"Transform: Reordering headers for {os.path.basename(urlparse(source_url).path)} ..." ) df = df[headers] - # save to output file - logging.info(f"Saving to output file.. {target_file}") + logging.info(f"Saving to output file {target_file} ...") try: save_to_new_file(df, file_path=str(target_file)) except Exception as e: logging.error(f"Error saving output file: {e}.") - # upload to GCS logging.info( f"Uploading output file to.. gs://{target_gcs_bucket}/{target_gcs_path}" ) @@ -114,20 +101,20 @@ def main( ) -def rename_headers(df, rename_mappings): +def rename_headers(df: pd.DataFrame, rename_mappings: dict) -> None: df = df.rename(columns=rename_mappings, inplace=True) -def filter_null_rows(df): +def filter_null_rows(df: pd.DataFrame) -> None: df = df[df.ein != ""] -def save_to_new_file(df, file_path): +def save_to_new_file(df: pd.DataFrame, file_path: pathlib.Path) -> None: # df.export_csv(file_path) df.to_csv(file_path, index=False) -def download_file(source_url: str, source_file: pathlib.Path): +def download_file(source_url: str, source_file: pathlib.Path) -> None: logging.info(f"Downloading {source_url} into {source_file}") r = requests.get(source_url, stream=True) if r.status_code == 200: diff --git a/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt b/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt index ecd275f68..1c45cdfc3 100644 --- a/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt +++ b/datasets/irs_990/_images/run_csv_transform_kub/requirements.txt @@ -1,4 +1,3 @@ requests -vaex google-cloud-storage pandas diff --git a/datasets/irs_990/dataset.yaml b/datasets/irs_990/dataset.yaml index fb1e34f92..89fc44f5d 100644 --- a/datasets/irs_990/dataset.yaml +++ b/datasets/irs_990/dataset.yaml @@ -23,7 +23,7 @@ dataset: friendly_name: irs_990 # A short, descriptive summary of the dataset. - description: irs_990 based 2015 datasets + description: IRS 990 dataset # A list of sources the dataset is derived from, using the YAML list syntax. dataset_sources: ~ @@ -55,4 +55,4 @@ resources: # description (A user-friendly description of the dataset) # location (The geographic location where the dataset should reside) dataset_id: irs_990 - description: irs_990 + description: irs_990 diff --git a/datasets/irs_990/irs_990_2014/pipeline.yaml b/datasets/irs_990/irs_990_2014/pipeline.yaml index 25bffea7a..38b6b8bb7 100644 --- a/datasets/irs_990/irs_990_2014/pipeline.yaml +++ b/datasets/irs_990/irs_990_2014/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_2014 # Description of the table - description: "irs_990 2014 dataset" + description: "IRS 990 2014 dataset" dag: airflow_version: 1 @@ -106,741 +106,741 @@ dag: # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" - mode : "required" - - name : "tax_pd" - type : "integer" - mode : "nullable" - - name : "subseccd" - type : "integer" - mode : "nullable" - - name : "s501c3or4947a1cd" - type : "string" - mode : "nullable" - - name : "schdbind" - type : "string" - mode : "nullable" - - name : "politicalactvtscd" - type : "string" - mode : "nullable" - - name : "lbbyingactvtscd" - type : "string" - mode : "nullable" - - name : "subjto6033cd" - type : "string" - mode : "nullable" - - name : "dnradvisedfundscd" - type : "string" - mode : "nullable" - - name : "prptyintrcvdcd" - type : "string" - mode : "nullable" - - name : "maintwrkofartcd" - type : "string" - mode : "nullable" - - name : "crcounselingqstncd" - type : "string" - mode : "nullable" - - name : "hldassetsintermpermcd" - type : "string" - mode : "nullable" - - name : "rptlndbldgeqptcd" - type : "string" - mode : "nullable" - - name : "rptinvstothsecd" - type : "string" - mode : "nullable" - - name : "rptinvstprgrelcd" - type : "string" - mode : "nullable" - - name : "rptothasstcd" - type : "string" - mode : "nullable" - - name : "rptothliabcd" - type : "string" - mode : "nullable" - - name : "sepcnsldtfinstmtcd" - type : "string" - mode : "nullable" - - name : "sepindaudfinstmtcd" - type : "string" - mode : "nullable" - - name : "inclinfinstmtcd" - type : "string" - mode : "nullable" - - name : "operateschools170cd" - type : "string" - mode : "nullable" - - name : "frgnofficecd" - type : "string" - mode : "nullable" - - name : "frgnrevexpnscd" - type : "string" - mode : "nullable" - - name : "frgngrntscd" - type : "string" - mode : "nullable" - - name : "frgnaggragrntscd" - type : "string" - mode : "nullable" - - name : "rptprofndrsngfeescd" - type : "string" - mode : "nullable" - - name : "rptincfnndrsngcd" - type : "string" - mode : "nullable" - - name : "rptincgamingcd" - type : "string" - mode : "nullable" - - name : "operatehosptlcd" - type : "string" - mode : "nullable" - - name : "hospaudfinstmtcd" - type : "string" - mode : "nullable" - - name : "rptgrntstogovtcd" - type : "string" - mode : "nullable" - - name : "rptgrntstoindvcd" - type : "string" - mode : "nullable" - - name : "rptyestocompnstncd" - type : "string" - mode : "nullable" - - name : "txexmptbndcd" - type : "string" - mode : "nullable" - - name : "invstproceedscd" - type : "string" - mode : "nullable" - - name : "maintescrwaccntcd" - type : "string" - mode : "nullable" - - name : "actonbehalfcd" - type : "string" - mode : "nullable" - - name : "engageexcessbnftcd" - type : "string" - mode : "nullable" - - name : "awarexcessbnftcd" - type : "string" - mode : "nullable" - - name : "loantofficercd" - type : "string" - mode : "nullable" - - name : "grantoofficercd" - type : "string" - mode : "nullable" - - name : "dirbusnreltdcd" - type : "string" - mode : "nullable" - - name : "fmlybusnreltdcd" - type : "string" - mode : "nullable" - - name : "servasofficercd" - type : "string" - mode : "nullable" - - name : "recvnoncashcd" - type : "string" - mode : "nullable" - - name : "recvartcd" - type : "string" - mode : "nullable" - - name : "ceaseoperationscd" - type : "string" - mode : "nullable" - - name : "sellorexchcd" - type : "string" - mode : "nullable" - - name : "ownsepentcd" - type : "string" - mode : "nullable" - - name : "reltdorgcd" - type : "string" - mode : "nullable" - - name : "intincntrlcd" - type : "string" - mode : "nullable" - - name : "orgtrnsfrcd" - type : "string" - mode : "nullable" - - name : "conduct5percentcd" - type : "string" - mode : "nullable" - - name : "compltschocd" - type : "string" - mode : "nullable" - - name : "f1096cnt" - type : "integer" - mode : "nullable" - - name : "fw2gcnt" - type : "integer" - mode : "nullable" - - name : "wthldngrulescd" - type : "string" - mode : "nullable" - - name : "noemplyeesw3cnt" - type : "integer" - mode : "nullable" - - name : "filerqrdrtnscd" - type : "string" - mode : "nullable" - - name : "unrelbusinccd" - type : "string" - mode : "nullable" - - name : "filedf990tcd" - type : "string" - mode : "nullable" - - name : "frgnacctcd" - type : "string" - mode : "nullable" - - name : "prohibtdtxshltrcd" - type : "string" - mode : "nullable" - - name : "prtynotifyorgcd" - type : "string" - mode : "nullable" - - name : "filedf8886tcd" - type : "string" - mode : "nullable" - - name : "solicitcntrbcd" - type : "string" - mode : "nullable" - - name : "exprstmntcd" - type : "string" - mode : "nullable" - - name : "providegoodscd" - type : "string" - mode : "nullable" - - name : "notfydnrvalcd" - type : "string" - mode : "nullable" - - name : "filedf8282cd" - type : "string" - mode : "nullable" - - name : "f8282cnt" - type : "integer" - mode : "nullable" - - name : "fndsrcvdcd" - type : "string" - mode : "nullable" - - name : "premiumspaidcd" - type : "string" - mode : "nullable" - - name : "filedf8899cd" - type : "string" - mode : "nullable" - - name : "filedf1098ccd" - type : "string" - mode : "nullable" - - name : "excbushldngscd" - type : "string" - mode : "nullable" - - name : "s4966distribcd" - type : "string" - mode : "nullable" - - name : "distribtodonorcd" - type : "string" - mode : "nullable" - - name : "initiationfees" - type : "integer" - mode : "nullable" - - name : "grsrcptspublicuse" - type : "integer" - mode : "nullable" - - name : "grsincmembers" - type : "integer" - mode : "nullable" - - name : "grsincother" - type : "integer" - mode : "nullable" - - name : "filedlieuf1041cd" - type : "string" - mode : "nullable" - - name : "txexmptint" - type : "integer" - mode : "nullable" - - name : "qualhlthplncd" - type : "string" - mode : "nullable" - - name : "qualhlthreqmntn" - type : "integer" - mode : "nullable" - - name : "qualhlthonhnd" - type : "integer" - mode : "nullable" - - name : "rcvdpdtngcd" - type : "string" - mode : "nullable" - - name : "filedf720cd" - type : "string" - mode : "nullable" - - name : "totreprtabled" - type : "integer" - mode : "nullable" - - name : "totcomprelatede" - type : "integer" - mode : "nullable" - - name : "totestcompf" - type : "integer" - mode : "nullable" - - name : "noindiv100kcnt" - type : "integer" - mode : "nullable" - - name : "nocontractor100kcnt" - type : "integer" - mode : "nullable" - - name : "totcntrbgfts" - type : "integer" - mode : "nullable" - - name : "prgmservcode2acd" - type : "integer" - mode : "nullable" - - name : "totrev2acola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2bcd" - type : "integer" - mode : "nullable" - - name : "totrev2bcola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2ccd" - type : "integer" - mode : "nullable" - - name : "totrev2ccola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2dcd" - type : "integer" - mode : "nullable" - - name : "totrev2dcola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2ecd" - type : "integer" - mode : "nullable" - - name : "totrev2ecola" - type : "integer" - mode : "nullable" - - name : "totrev2fcola" - type : "integer" - mode : "nullable" - - name : "totprgmrevnue" - type : "integer" - mode : "nullable" - - name : "invstmntinc" - type : "integer" - mode : "nullable" - - name : "txexmptbndsproceeds" - type : "integer" - mode : "nullable" - - name : "royaltsinc" - type : "integer" - mode : "nullable" - - name : "grsrntsreal" - type : "integer" - mode : "nullable" - - name : "grsrntsprsnl" - type : "integer" - mode : "nullable" - - name : "rntlexpnsreal" - type : "integer" - mode : "nullable" - - name : "rntlexpnsprsnl" - type : "integer" - mode : "nullable" - - name : "rntlincreal" - type : "integer" - mode : "nullable" - - name : "rntlincprsnl" - type : "integer" - mode : "nullable" - - name : "netrntlinc" - type : "integer" - mode : "nullable" - - name : "grsalesecur" - type : "integer" - mode : "nullable" - - name : "grsalesothr" - type : "integer" - mode : "nullable" - - name : "cstbasisecur" - type : "integer" - mode : "nullable" - - name : "cstbasisothr" - type : "integer" - mode : "nullable" - - name : "gnlsecur" - type : "integer" - mode : "nullable" - - name : "gnlsothr" - type : "integer" - mode : "nullable" - - name : "netgnls" - type : "integer" - mode : "nullable" - - name : "grsincfndrsng" - type : "integer" - mode : "nullable" - - name : "lessdirfndrsng" - type : "integer" - mode : "nullable" - - name : "netincfndrsng" - type : "integer" - mode : "nullable" - - name : "grsincgaming" - type : "integer" - mode : "nullable" - - name : "lessdirgaming" - type : "integer" - mode : "nullable" - - name : "netincgaming" - type : "integer" - mode : "nullable" - - name : "grsalesinvent" - type : "integer" - mode : "nullable" - - name : "lesscstofgoods" - type : "integer" - mode : "nullable" - - name : "netincsales" - type : "integer" - mode : "nullable" - - name : "miscrev11acd" - type : "integer" - mode : "nullable" - - name : "miscrevtota" - type : "integer" - mode : "nullable" - - name : "miscrev11bcd" - type : "integer" - mode : "nullable" - - name : "miscrevtot11b" - type : "integer" - mode : "nullable" - - name : "miscrev11ccd" - type : "integer" - mode : "nullable" - - name : "miscrevtot11c" - type : "integer" - mode : "nullable" - - name : "miscrevtot11d" - type : "integer" - mode : "nullable" - - name : "miscrevtot11e" - type : "integer" - mode : "nullable" - - name : "totrevenue" - type : "integer" - mode : "nullable" - - name : "grntstogovt" - type : "integer" - mode : "nullable" - - name : "grnsttoindiv" - type : "integer" - mode : "nullable" - - name : "grntstofrgngovt" - type : "integer" - mode : "nullable" - - name : "benifitsmembrs" - type : "integer" - mode : "nullable" - - name : "compnsatncurrofcr" - type : "integer" - mode : "nullable" - - name : "compnsatnandothr" - type : "integer" - mode : "nullable" - - name : "othrsalwages" - type : "integer" - mode : "nullable" - - name : "pensionplancontrb" - type : "integer" - mode : "nullable" - - name : "othremplyeebenef" - type : "integer" - mode : "nullable" - - name : "payrolltx" - type : "integer" - mode : "nullable" - - name : "feesforsrvcmgmt" - type : "integer" - mode : "nullable" - - name : "legalfees" - type : "integer" - mode : "nullable" - - name : "accntingfees" - type : "integer" - mode : "nullable" - - name : "feesforsrvclobby" - type : "integer" - mode : "nullable" - - name : "profndraising" - type : "integer" - mode : "nullable" - - name : "feesforsrvcinvstmgmt" - type : "integer" - mode : "nullable" - - name : "feesforsrvcothr" - type : "integer" - mode : "nullable" - - name : "advrtpromo" - type : "integer" - mode : "nullable" - - name : "officexpns" - type : "integer" - mode : "nullable" - - name : "infotech" - type : "integer" - mode : "nullable" - - name : "royaltsexpns" - type : "integer" - mode : "nullable" - - name : "occupancy" - type : "integer" - mode : "nullable" - - name : "travel" - type : "integer" - mode : "nullable" - - name : "travelofpublicoffcl" - type : "integer" - mode : "nullable" - - name : "converconventmtng" - type : "integer" - mode : "nullable" - - name : "interestamt" - type : "integer" - mode : "nullable" - - name : "pymtoaffiliates" - type : "integer" - mode : "nullable" - - name : "deprcatndepletn" - type : "integer" - mode : "nullable" - - name : "insurance" - type : "integer" - mode : "nullable" - - name : "othrexpnsa" - type : "integer" - mode : "nullable" - - name : "othrexpnsb" - type : "integer" - mode : "nullable" - - name : "othrexpnsc" - type : "integer" - mode : "nullable" - - name : "othrexpnsd" - type : "integer" - mode : "nullable" - - name : "othrexpnse" - type : "integer" - mode : "nullable" - - name : "othrexpnsf" - type : "integer" - mode : "nullable" - - name : "totfuncexpns" - type : "integer" - mode : "nullable" - - name : "nonintcashend" - type : "integer" - mode : "nullable" - - name : "svngstempinvend" - type : "integer" - mode : "nullable" - - name : "pldgegrntrcvblend" - type : "integer" - mode : "nullable" - - name : "accntsrcvblend" - type : "integer" - mode : "nullable" - - name : "currfrmrcvblend" - type : "integer" - mode : "nullable" - - name : "rcvbldisqualend" - type : "integer" - mode : "nullable" - - name : "notesloansrcvblend" - type : "integer" - mode : "nullable" - - name : "invntriesalesend" - type : "integer" - mode : "nullable" - - name : "prepaidexpnsend" - type : "integer" - mode : "nullable" - - name : "lndbldgsequipend" - type : "integer" - mode : "nullable" - - name : "invstmntsend" - type : "integer" - mode : "nullable" - - name : "invstmntsothrend" - type : "integer" - mode : "nullable" - - name : "invstmntsprgmend" - type : "integer" - mode : "nullable" - - name : "intangibleassetsend" - type : "integer" - mode : "nullable" - - name : "othrassetsend" - type : "integer" - mode : "nullable" - - name : "totassetsend" - type : "integer" - mode : "nullable" - - name : "accntspayableend" - type : "integer" - mode : "nullable" - - name : "grntspayableend" - type : "integer" - mode : "nullable" - - name : "deferedrevnuend" - type : "integer" - mode : "nullable" - - name : "txexmptbndsend" - type : "integer" - mode : "nullable" - - name : "escrwaccntliabend" - type : "integer" - mode : "nullable" - - name : "paybletoffcrsend" - type : "integer" - mode : "nullable" - - name : "secrdmrtgsend" - type : "integer" - mode : "nullable" - - name : "unsecurednotesend" - type : "integer" - mode : "nullable" - - name : "othrliabend" - type : "integer" - mode : "nullable" - - name : "totliabend" - type : "integer" - mode : "nullable" - - name : "unrstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "temprstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "permrstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "capitalstktrstend" - type : "integer" - mode : "nullable" - - name : "paidinsurplusend" - type : "integer" - mode : "nullable" - - name : "retainedearnend" - type : "integer" - mode : "nullable" - - name : "totnetassetend" - type : "integer" - mode : "nullable" - - name : "totnetliabastend" - type : "integer" - mode : "nullable" - - name : "nonpfrea" - type : "integer" - mode : "nullable" - - name : "totnooforgscnt" - type : "integer" - mode : "nullable" - - name : "totsupport" - type : "integer" - mode : "nullable" - - name : "gftgrntsrcvd170" - type : "integer" - mode : "nullable" - - name : "txrevnuelevied170" - type : "integer" - mode : "nullable" - - name : "srvcsval170" - type : "integer" - mode : "nullable" - - name : "pubsuppsubtot170" - type : "integer" - mode : "nullable" - - name : "exceeds2pct170" - type : "integer" - mode : "nullable" - - name : "pubsupplesspct170" - type : "integer" - mode : "nullable" - - name : "samepubsuppsubtot170" - type : "integer" - mode : "nullable" - - name : "grsinc170" - type : "integer" - mode : "nullable" - - name : "netincunreltd170" - type : "integer" - mode : "nullable" - - name : "othrinc170" - type : "integer" - mode : "nullable" - - name : "totsupp170" - type : "integer" - mode : "nullable" - - name : "grsrcptsrelated170" - type : "integer" - mode : "nullable" - - name : "totgftgrntrcvd509" - type : "integer" - mode : "nullable" - - name : "grsrcptsadmissn509" - type : "integer" - mode : "nullable" - - name : "grsrcptsactivities509" - type : "integer" - mode : "nullable" - - name : "txrevnuelevied509" - type : "integer" - mode : "nullable" - - name : "srvcsval509" - type : "integer" - mode : "nullable" - - name : "pubsuppsubtot509" - type : "integer" - mode : "nullable" - - name : "rcvdfrmdisqualsub509" - type : "integer" - mode : "nullable" - - name : "exceeds1pct509" - type : "integer" - mode : "nullable" - - name : "subtotpub509" - type : "integer" - mode : "nullable" - - name : "pubsupplesub509" - type : "integer" - mode : "nullable" - - name : "samepubsuppsubtot509" - type : "integer" - mode : "nullable" - - name : "grsinc509" - type : "integer" - mode : "nullable" - - name : "unreltxincls511tx509" - type : "integer" - mode : "nullable" - - name : "subtotsuppinc509" - type : "integer" - mode : "nullable" - - name : "netincunrelatd509" - type : "integer" - mode : "nullable" - - name : "othrinc509" - type : "integer" - mode : "nullable" - - name : "totsupp509" - type : "integer" - mode : "nullable" + - name: "ein" + type: "string" + mode: "required" + - name: "tax_pd" + type: "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" + mode: "nullable" + - name: "s501c3or4947a1cd" + type: "string" + mode: "nullable" + - name: "schdbind" + type: "string" + mode: "nullable" + - name: "politicalactvtscd" + type: "string" + mode: "nullable" + - name: "lbbyingactvtscd" + type: "string" + mode: "nullable" + - name: "subjto6033cd" + type: "string" + mode: "nullable" + - name: "dnradvisedfundscd" + type: "string" + mode: "nullable" + - name: "prptyintrcvdcd" + type: "string" + mode: "nullable" + - name: "maintwrkofartcd" + type: "string" + mode: "nullable" + - name: "crcounselingqstncd" + type: "string" + mode: "nullable" + - name: "hldassetsintermpermcd" + type: "string" + mode: "nullable" + - name: "rptlndbldgeqptcd" + type: "string" + mode: "nullable" + - name: "rptinvstothsecd" + type: "string" + mode: "nullable" + - name: "rptinvstprgrelcd" + type: "string" + mode: "nullable" + - name: "rptothasstcd" + type: "string" + mode: "nullable" + - name: "rptothliabcd" + type: "string" + mode: "nullable" + - name: "sepcnsldtfinstmtcd" + type: "string" + mode: "nullable" + - name: "sepindaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "inclinfinstmtcd" + type: "string" + mode: "nullable" + - name: "operateschools170cd" + type: "string" + mode: "nullable" + - name: "frgnofficecd" + type: "string" + mode: "nullable" + - name: "frgnrevexpnscd" + type: "string" + mode: "nullable" + - name: "frgngrntscd" + type: "string" + mode: "nullable" + - name: "frgnaggragrntscd" + type: "string" + mode: "nullable" + - name: "rptprofndrsngfeescd" + type: "string" + mode: "nullable" + - name: "rptincfnndrsngcd" + type: "string" + mode: "nullable" + - name: "rptincgamingcd" + type: "string" + mode: "nullable" + - name: "operatehosptlcd" + type: "string" + mode: "nullable" + - name: "hospaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstogovtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstoindvcd" + type: "string" + mode: "nullable" + - name: "rptyestocompnstncd" + type: "string" + mode: "nullable" + - name: "txexmptbndcd" + type: "string" + mode: "nullable" + - name: "invstproceedscd" + type: "string" + mode: "nullable" + - name: "maintescrwaccntcd" + type: "string" + mode: "nullable" + - name: "actonbehalfcd" + type: "string" + mode: "nullable" + - name: "engageexcessbnftcd" + type: "string" + mode: "nullable" + - name: "awarexcessbnftcd" + type: "string" + mode: "nullable" + - name: "loantofficercd" + type: "string" + mode: "nullable" + - name: "grantoofficercd" + type: "string" + mode: "nullable" + - name: "dirbusnreltdcd" + type: "string" + mode: "nullable" + - name: "fmlybusnreltdcd" + type: "string" + mode: "nullable" + - name: "servasofficercd" + type: "string" + mode: "nullable" + - name: "recvnoncashcd" + type: "string" + mode: "nullable" + - name: "recvartcd" + type: "string" + mode: "nullable" + - name: "ceaseoperationscd" + type: "string" + mode: "nullable" + - name: "sellorexchcd" + type: "string" + mode: "nullable" + - name: "ownsepentcd" + type: "string" + mode: "nullable" + - name: "reltdorgcd" + type: "string" + mode: "nullable" + - name: "intincntrlcd" + type: "string" + mode: "nullable" + - name: "orgtrnsfrcd" + type: "string" + mode: "nullable" + - name: "conduct5percentcd" + type: "string" + mode: "nullable" + - name: "compltschocd" + type: "string" + mode: "nullable" + - name: "f1096cnt" + type: "integer" + mode: "nullable" + - name: "fw2gcnt" + type: "integer" + mode: "nullable" + - name: "wthldngrulescd" + type: "string" + mode: "nullable" + - name: "noemplyeesw3cnt" + type: "integer" + mode: "nullable" + - name: "filerqrdrtnscd" + type: "string" + mode: "nullable" + - name: "unrelbusinccd" + type: "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + mode: "nullable" + - name: "frgnacctcd" + type: "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + mode: "nullable" + - name: "prtynotifyorgcd" + type: "string" + mode: "nullable" + - name: "filedf8886tcd" + type: "string" + mode: "nullable" + - name: "solicitcntrbcd" + type: "string" + mode: "nullable" + - name: "exprstmntcd" + type: "string" + mode: "nullable" + - name: "providegoodscd" + type: "string" + mode: "nullable" + - name: "notfydnrvalcd" + type: "string" + mode: "nullable" + - name: "filedf8282cd" + type: "string" + mode: "nullable" + - name: "f8282cnt" + type: "integer" + mode: "nullable" + - name: "fndsrcvdcd" + type: "string" + mode: "nullable" + - name: "premiumspaidcd" + type: "string" + mode: "nullable" + - name: "filedf8899cd" + type: "string" + mode: "nullable" + - name: "filedf1098ccd" + type: "string" + mode: "nullable" + - name: "excbushldngscd" + type: "string" + mode: "nullable" + - name: "s4966distribcd" + type: "string" + mode: "nullable" + - name: "distribtodonorcd" + type: "string" + mode: "nullable" + - name: "initiationfees" + type: "integer" + mode: "nullable" + - name: "grsrcptspublicuse" + type: "integer" + mode: "nullable" + - name: "grsincmembers" + type: "integer" + mode: "nullable" + - name: "grsincother" + type: "integer" + mode: "nullable" + - name: "filedlieuf1041cd" + type: "string" + mode: "nullable" + - name: "txexmptint" + type: "integer" + mode: "nullable" + - name: "qualhlthplncd" + type: "string" + mode: "nullable" + - name: "qualhlthreqmntn" + type: "integer" + mode: "nullable" + - name: "qualhlthonhnd" + type: "integer" + mode: "nullable" + - name: "rcvdpdtngcd" + type: "string" + mode: "nullable" + - name: "filedf720cd" + type: "string" + mode: "nullable" + - name: "totreprtabled" + type: "integer" + mode: "nullable" + - name: "totcomprelatede" + type: "integer" + mode: "nullable" + - name: "totestcompf" + type: "integer" + mode: "nullable" + - name: "noindiv100kcnt" + type: "integer" + mode: "nullable" + - name: "nocontractor100kcnt" + type: "integer" + mode: "nullable" + - name: "totcntrbgfts" + type: "integer" + mode: "nullable" + - name: "prgmservcode2acd" + type: "integer" + mode: "nullable" + - name: "totrev2acola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2bcd" + type: "integer" + mode: "nullable" + - name: "totrev2bcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ccd" + type: "integer" + mode: "nullable" + - name: "totrev2ccola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2dcd" + type: "integer" + mode: "nullable" + - name: "totrev2dcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ecd" + type: "integer" + mode: "nullable" + - name: "totrev2ecola" + type: "integer" + mode: "nullable" + - name: "totrev2fcola" + type: "integer" + mode: "nullable" + - name: "totprgmrevnue" + type: "integer" + mode: "nullable" + - name: "invstmntinc" + type: "integer" + mode: "nullable" + - name: "txexmptbndsproceeds" + type: "integer" + mode: "nullable" + - name: "royaltsinc" + type: "integer" + mode: "nullable" + - name: "grsrntsreal" + type: "integer" + mode: "nullable" + - name: "grsrntsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlexpnsreal" + type: "integer" + mode: "nullable" + - name: "rntlexpnsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlincreal" + type: "integer" + mode: "nullable" + - name: "rntlincprsnl" + type: "integer" + mode: "nullable" + - name: "netrntlinc" + type: "integer" + mode: "nullable" + - name: "grsalesecur" + type: "integer" + mode: "nullable" + - name: "grsalesothr" + type: "integer" + mode: "nullable" + - name: "cstbasisecur" + type: "integer" + mode: "nullable" + - name: "cstbasisothr" + type: "integer" + mode: "nullable" + - name: "gnlsecur" + type: "integer" + mode: "nullable" + - name: "gnlsothr" + type: "integer" + mode: "nullable" + - name: "netgnls" + type: "integer" + mode: "nullable" + - name: "grsincfndrsng" + type: "integer" + mode: "nullable" + - name: "lessdirfndrsng" + type: "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + mode: "nullable" + - name: "lessdirgaming" + type: "integer" + mode: "nullable" + - name: "netincgaming" + type: "integer" + mode: "nullable" + - name: "grsalesinvent" + type: "integer" + mode: "nullable" + - name: "lesscstofgoods" + type: "integer" + mode: "nullable" + - name: "netincsales" + type: "integer" + mode: "nullable" + - name: "miscrev11acd" + type: "integer" + mode: "nullable" + - name: "miscrevtota" + type: "integer" + mode: "nullable" + - name: "miscrev11bcd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11b" + type: "integer" + mode: "nullable" + - name: "miscrev11ccd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11c" + type: "integer" + mode: "nullable" + - name: "miscrevtot11d" + type: "integer" + mode: "nullable" + - name: "miscrevtot11e" + type: "integer" + mode: "nullable" + - name: "totrevenue" + type: "integer" + mode: "nullable" + - name: "grntstogovt" + type: "integer" + mode: "nullable" + - name: "grnsttoindiv" + type: "integer" + mode: "nullable" + - name: "grntstofrgngovt" + type: "integer" + mode: "nullable" + - name: "benifitsmembrs" + type: "integer" + mode: "nullable" + - name: "compnsatncurrofcr" + type: "integer" + mode: "nullable" + - name: "compnsatnandothr" + type: "integer" + mode: "nullable" + - name: "othrsalwages" + type: "integer" + mode: "nullable" + - name: "pensionplancontrb" + type: "integer" + mode: "nullable" + - name: "othremplyeebenef" + type: "integer" + mode: "nullable" + - name: "payrolltx" + type: "integer" + mode: "nullable" + - name: "feesforsrvcmgmt" + type: "integer" + mode: "nullable" + - name: "legalfees" + type: "integer" + mode: "nullable" + - name: "accntingfees" + type: "integer" + mode: "nullable" + - name: "feesforsrvclobby" + type: "integer" + mode: "nullable" + - name: "profndraising" + type: "integer" + mode: "nullable" + - name: "feesforsrvcinvstmgmt" + type: "integer" + mode: "nullable" + - name: "feesforsrvcothr" + type: "integer" + mode: "nullable" + - name: "advrtpromo" + type: "integer" + mode: "nullable" + - name: "officexpns" + type: "integer" + mode: "nullable" + - name: "infotech" + type: "integer" + mode: "nullable" + - name: "royaltsexpns" + type: "integer" + mode: "nullable" + - name: "occupancy" + type: "integer" + mode: "nullable" + - name: "travel" + type: "integer" + mode: "nullable" + - name: "travelofpublicoffcl" + type: "integer" + mode: "nullable" + - name: "converconventmtng" + type: "integer" + mode: "nullable" + - name: "interestamt" + type: "integer" + mode: "nullable" + - name: "pymtoaffiliates" + type: "integer" + mode: "nullable" + - name: "deprcatndepletn" + type: "integer" + mode: "nullable" + - name: "insurance" + type: "integer" + mode: "nullable" + - name: "othrexpnsa" + type: "integer" + mode: "nullable" + - name: "othrexpnsb" + type: "integer" + mode: "nullable" + - name: "othrexpnsc" + type: "integer" + mode: "nullable" + - name: "othrexpnsd" + type: "integer" + mode: "nullable" + - name: "othrexpnse" + type: "integer" + mode: "nullable" + - name: "othrexpnsf" + type: "integer" + mode: "nullable" + - name: "totfuncexpns" + type: "integer" + mode: "nullable" + - name: "nonintcashend" + type: "integer" + mode: "nullable" + - name: "svngstempinvend" + type: "integer" + mode: "nullable" + - name: "pldgegrntrcvblend" + type: "integer" + mode: "nullable" + - name: "accntsrcvblend" + type: "integer" + mode: "nullable" + - name: "currfrmrcvblend" + type: "integer" + mode: "nullable" + - name: "rcvbldisqualend" + type: "integer" + mode: "nullable" + - name: "notesloansrcvblend" + type: "integer" + mode: "nullable" + - name: "invntriesalesend" + type: "integer" + mode: "nullable" + - name: "prepaidexpnsend" + type: "integer" + mode: "nullable" + - name: "lndbldgsequipend" + type: "integer" + mode: "nullable" + - name: "invstmntsend" + type: "integer" + mode: "nullable" + - name: "invstmntsothrend" + type: "integer" + mode: "nullable" + - name: "invstmntsprgmend" + type: "integer" + mode: "nullable" + - name: "intangibleassetsend" + type: "integer" + mode: "nullable" + - name: "othrassetsend" + type: "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" + mode: "nullable" + - name: "accntspayableend" + type: "integer" + mode: "nullable" + - name: "grntspayableend" + type: "integer" + mode: "nullable" + - name: "deferedrevnuend" + type: "integer" + mode: "nullable" + - name: "txexmptbndsend" + type: "integer" + mode: "nullable" + - name: "escrwaccntliabend" + type: "integer" + mode: "nullable" + - name: "paybletoffcrsend" + type: "integer" + mode: "nullable" + - name: "secrdmrtgsend" + type: "integer" + mode: "nullable" + - name: "unsecurednotesend" + type: "integer" + mode: "nullable" + - name: "othrliabend" + type: "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" + mode: "nullable" + - name: "unrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "temprstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "permrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "capitalstktrstend" + type: "integer" + mode: "nullable" + - name: "paidinsurplusend" + type: "integer" + mode: "nullable" + - name: "retainedearnend" + type: "integer" + mode: "nullable" + - name: "totnetassetend" + type: "integer" + mode: "nullable" + - name: "totnetliabastend" + type: "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" + mode: "nullable" graph_paths: - - "irs_990_transform_csv >> load_irs_990_to_bq" \ No newline at end of file + - "irs_990_transform_csv >> load_irs_990_to_bq" diff --git a/datasets/irs_990/irs_990_2015/pipeline.yaml b/datasets/irs_990/irs_990_2015/pipeline.yaml index 2225651d1..d12a83ccc 100644 --- a/datasets/irs_990/irs_990_2015/pipeline.yaml +++ b/datasets/irs_990/irs_990_2015/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_2015 # Description of the table - description: "irs_990 2015 dataset" + description: "IRS 990 2015 dataset" dag: airflow_version: 1 @@ -104,744 +104,744 @@ dag: # Always use snake_case and lowercase for column names, and be explicit, # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" - mode : "required" - - name : "elf" - type : "string" - mode : "nullable" - - name : "tax_pd" - type : "integer" - mode : "nullable" - - name : "subseccd" - type : "integer" - mode : "nullable" - - name : "s501c3or4947a1cd" - type : "string" - mode : "nullable" - - name : "schdbind" - type : "string" - mode : "nullable" - - name : "politicalactvtscd" - type : "string" - mode : "nullable" - - name : "lbbyingactvtscd" - type : "string" - mode : "nullable" - - name : "subjto6033cd" - type : "string" - mode : "nullable" - - name : "dnradvisedfundscd" - type : "string" - mode : "nullable" - - name : "prptyintrcvdcd" - type : "string" - mode : "nullable" - - name : "maintwrkofartcd" - type : "string" - mode : "nullable" - - name : "crcounselingqstncd" - type : "string" - mode : "nullable" - - name : "hldassetsintermpermcd" - type : "string" - mode : "nullable" - - name : "rptlndbldgeqptcd" - type : "string" - mode : "nullable" - - name : "rptinvstothsecd" - type : "string" - mode : "nullable" - - name : "rptinvstprgrelcd" - type : "string" - mode : "nullable" - - name : "rptothasstcd" - type : "string" - mode : "nullable" - - name : "rptothliabcd" - type : "string" - mode : "nullable" - - name : "sepcnsldtfinstmtcd" - type : "string" - mode : "nullable" - - name : "sepindaudfinstmtcd" - type : "string" - mode : "nullable" - - name : "inclinfinstmtcd" - type : "string" - mode : "nullable" - - name : "operateschools170cd" - type : "string" - mode : "nullable" - - name : "frgnofficecd" - type : "string" - mode : "nullable" - - name : "frgnrevexpnscd" - type : "string" - mode : "nullable" - - name : "frgngrntscd" - type : "string" - mode : "nullable" - - name : "frgnaggragrntscd" - type : "string" - mode : "nullable" - - name : "rptprofndrsngfeescd" - type : "string" - mode : "nullable" - - name : "rptincfnndrsngcd" - type : "string" - mode : "nullable" - - name : "rptincgamingcd" - type : "string" - mode : "nullable" - - name : "operatehosptlcd" - type : "string" - mode : "nullable" - - name : "hospaudfinstmtcd" - type : "string" - mode : "nullable" - - name : "rptgrntstogovtcd" - type : "string" - mode : "nullable" - - name : "rptgrntstoindvcd" - type : "string" - mode : "nullable" - - name : "rptyestocompnstncd" - type : "string" - mode : "nullable" - - name : "txexmptbndcd" - type : "string" - mode : "nullable" - - name : "invstproceedscd" - type : "string" - mode : "nullable" - - name : "maintescrwaccntcd" - type : "string" - mode : "nullable" - - name : "actonbehalfcd" - type : "string" - mode : "nullable" - - name : "engageexcessbnftcd" - type : "string" - mode : "nullable" - - name : "awarexcessbnftcd" - type : "string" - mode : "nullable" - - name : "loantofficercd" - type : "string" - mode : "nullable" - - name : "grantoofficercd" - type : "string" - mode : "nullable" - - name : "dirbusnreltdcd" - type : "string" - mode : "nullable" - - name : "fmlybusnreltdcd" - type : "string" - mode : "nullable" - - name : "servasofficercd" - type : "string" - mode : "nullable" - - name : "recvnoncashcd" - type : "string" - mode : "nullable" - - name : "recvartcd" - type : "string" - mode : "nullable" - - name : "ceaseoperationscd" - type : "string" - mode : "nullable" - - name : "sellorexchcd" - type : "string" - mode : "nullable" - - name : "ownsepentcd" - type : "string" - mode : "nullable" - - name : "reltdorgcd" - type : "string" - mode : "nullable" - - name : "intincntrlcd" - type : "string" - mode : "nullable" - - name : "orgtrnsfrcd" - type : "string" - mode : "nullable" - - name : "conduct5percentcd" - type : "string" - mode : "nullable" - - name : "compltschocd" - type : "string" - mode : "nullable" - - name : "f1096cnt" - type : "integer" - mode : "nullable" - - name : "fw2gcnt" - type : "integer" - mode : "nullable" - - name : "wthldngrulescd" - type : "string" - mode : "nullable" - - name : "noemplyeesw3cnt" - type : "integer" - mode : "nullable" - - name : "filerqrdrtnscd" - type : "string" - mode : "nullable" - - name : "unrelbusinccd" - type : "string" - mode : "nullable" - - name : "filedf990tcd" - type : "string" - mode : "nullable" - - name : "frgnacctcd" - type : "string" - mode : "nullable" - - name : "prohibtdtxshltrcd" - type : "string" - mode : "nullable" - - name : "prtynotifyorgcd" - type : "string" - mode : "nullable" - - name : "filedf8886tcd" - type : "string" - mode : "nullable" - - name : "solicitcntrbcd" - type : "string" - mode : "nullable" - - name : "exprstmntcd" - type : "string" - mode : "nullable" - - name : "providegoodscd" - type : "string" - mode : "nullable" - - name : "notfydnrvalcd" - type : "string" - mode : "nullable" - - name : "filedf8282cd" - type : "string" - mode : "nullable" - - name : "f8282cnt" - type : "integer" - mode : "nullable" - - name : "fndsrcvdcd" - type : "string" - mode : "nullable" - - name : "premiumspaidcd" - type : "string" - mode : "nullable" - - name : "filedf8899cd" - type : "string" - mode : "nullable" - - name : "filedf1098ccd" - type : "string" - mode : "nullable" - - name : "excbushldngscd" - type : "string" - mode : "nullable" - - name : "s4966distribcd" - type : "string" - mode : "nullable" - - name : "distribtodonorcd" - type : "string" - mode : "nullable" - - name : "initiationfees" - type : "integer" - mode : "nullable" - - name : "grsrcptspublicuse" - type : "integer" - mode : "nullable" - - name : "grsincmembers" - type : "integer" - mode : "nullable" - - name : "grsincother" - type : "integer" - mode : "nullable" - - name : "filedlieuf1041cd" - type : "string" - mode : "nullable" - - name : "txexmptint" - type : "integer" - mode : "nullable" - - name : "qualhlthplncd" - type : "string" - mode : "nullable" - - name : "qualhlthreqmntn" - type : "integer" - mode : "nullable" - - name : "qualhlthonhnd" - type : "integer" - mode : "nullable" - - name : "rcvdpdtngcd" - type : "string" - mode : "nullable" - - name : "filedf720cd" - type : "string" - mode : "nullable" - - name : "totreprtabled" - type : "integer" - mode : "nullable" - - name : "totcomprelatede" - type : "integer" - mode : "nullable" - - name : "totestcompf" - type : "integer" - mode : "nullable" - - name : "noindiv100kcnt" - type : "integer" - mode : "nullable" - - name : "nocontractor100kcnt" - type : "integer" - mode : "nullable" - - name : "totcntrbgfts" - type : "integer" - mode : "nullable" - - name : "prgmservcode2acd" - type : "integer" - mode : "nullable" - - name : "totrev2acola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2bcd" - type : "integer" - mode : "nullable" - - name : "totrev2bcola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2ccd" - type : "integer" - mode : "nullable" - - name : "totrev2ccola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2dcd" - type : "integer" - mode : "nullable" - - name : "totrev2dcola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2ecd" - type : "integer" - mode : "nullable" - - name : "totrev2ecola" - type : "integer" - mode : "nullable" - - name : "totrev2fcola" - type : "integer" - mode : "nullable" - - name : "totprgmrevnue" - type : "integer" - mode : "nullable" - - name : "invstmntinc" - type : "integer" - mode : "nullable" - - name : "txexmptbndsproceeds" - type : "integer" - mode : "nullable" - - name : "royaltsinc" - type : "integer" - mode : "nullable" - - name : "grsrntsreal" - type : "integer" - mode : "nullable" - - name : "grsrntsprsnl" - type : "integer" - mode : "nullable" - - name : "rntlexpnsreal" - type : "integer" - mode : "nullable" - - name : "rntlexpnsprsnl" - type : "integer" - mode : "nullable" - - name : "rntlincreal" - type : "integer" - mode : "nullable" - - name : "rntlincprsnl" - type : "integer" - mode : "nullable" - - name : "netrntlinc" - type : "integer" - mode : "nullable" - - name : "grsalesecur" - type : "integer" - mode : "nullable" - - name : "grsalesothr" - type : "integer" - mode : "nullable" - - name : "cstbasisecur" - type : "integer" - mode : "nullable" - - name : "cstbasisothr" - type : "integer" - mode : "nullable" - - name : "gnlsecur" - type : "integer" - mode : "nullable" - - name : "gnlsothr" - type : "integer" - mode : "nullable" - - name : "netgnls" - type : "integer" - mode : "nullable" - - name : "grsincfndrsng" - type : "integer" - mode : "nullable" - - name : "lessdirfndrsng" - type : "integer" - mode : "nullable" - - name : "netincfndrsng" - type : "integer" - mode : "nullable" - - name : "grsincgaming" - type : "integer" - mode : "nullable" - - name : "lessdirgaming" - type : "integer" - mode : "nullable" - - name : "netincgaming" - type : "integer" - mode : "nullable" - - name : "grsalesinvent" - type : "integer" - mode : "nullable" - - name : "lesscstofgoods" - type : "integer" - mode : "nullable" - - name : "netincsales" - type : "integer" - mode : "nullable" - - name : "miscrev11acd" - type : "integer" - mode : "nullable" - - name : "miscrevtota" - type : "integer" - mode : "nullable" - - name : "miscrev11bcd" - type : "integer" - mode : "nullable" - - name : "miscrevtot11b" - type : "integer" - mode : "nullable" - - name : "miscrev11ccd" - type : "integer" - mode : "nullable" - - name : "miscrevtot11c" - type : "integer" - mode : "nullable" - - name : "miscrevtot11d" - type : "integer" - mode : "nullable" - - name : "miscrevtot11e" - type : "integer" - mode : "nullable" - - name : "totrevenue" - type : "integer" - mode : "nullable" - - name : "grntstogovt" - type : "integer" - mode : "nullable" - - name : "grnsttoindiv" - type : "integer" - mode : "nullable" - - name : "grntstofrgngovt" - type : "integer" - mode : "nullable" - - name : "benifitsmembrs" - type : "integer" - mode : "nullable" - - name : "compnsatncurrofcr" - type : "integer" - mode : "nullable" - - name : "compnsatnandothr" - type : "integer" - mode : "nullable" - - name : "othrsalwages" - type : "integer" - mode : "nullable" - - name : "pensionplancontrb" - type : "integer" - mode : "nullable" - - name : "othremplyeebenef" - type : "integer" - mode : "nullable" - - name : "payrolltx" - type : "integer" - mode : "nullable" - - name : "feesforsrvcmgmt" - type : "integer" - mode : "nullable" - - name : "legalfees" - type : "integer" - mode : "nullable" - - name : "accntingfees" - type : "integer" - mode : "nullable" - - name : "feesforsrvclobby" - type : "integer" - mode : "nullable" - - name : "profndraising" - type : "integer" - mode : "nullable" - - name : "feesforsrvcinvstmgmt" - type : "integer" - mode : "nullable" - - name : "feesforsrvcothr" - type : "integer" - mode : "nullable" - - name : "advrtpromo" - type : "integer" - mode : "nullable" - - name : "officexpns" - type : "integer" - mode : "nullable" - - name : "infotech" - type : "integer" - mode : "nullable" - - name : "royaltsexpns" - type : "integer" - mode : "nullable" - - name : "occupancy" - type : "integer" - mode : "nullable" - - name : "travel" - type : "integer" - mode : "nullable" - - name : "travelofpublicoffcl" - type : "integer" - mode : "nullable" - - name : "converconventmtng" - type : "integer" - mode : "nullable" - - name : "interestamt" - type : "integer" - mode : "nullable" - - name : "pymtoaffiliates" - type : "integer" - mode : "nullable" - - name : "deprcatndepletn" - type : "integer" - mode : "nullable" - - name : "insurance" - type : "integer" - mode : "nullable" - - name : "othrexpnsa" - type : "integer" - mode : "nullable" - - name : "othrexpnsb" - type : "integer" - mode : "nullable" - - name : "othrexpnsc" - type : "integer" - mode : "nullable" - - name : "othrexpnsd" - type : "integer" - mode : "nullable" - - name : "othrexpnse" - type : "integer" - mode : "nullable" - - name : "othrexpnsf" - type : "integer" - mode : "nullable" - - name : "totfuncexpns" - type : "integer" - mode : "nullable" - - name : "nonintcashend" - type : "integer" - mode : "nullable" - - name : "svngstempinvend" - type : "integer" - mode : "nullable" - - name : "pldgegrntrcvblend" - type : "integer" - mode : "nullable" - - name : "accntsrcvblend" - type : "integer" - mode : "nullable" - - name : "currfrmrcvblend" - type : "integer" - mode : "nullable" - - name : "rcvbldisqualend" - type : "integer" - mode : "nullable" - - name : "notesloansrcvblend" - type : "integer" - mode : "nullable" - - name : "invntriesalesend" - type : "integer" - mode : "nullable" - - name : "prepaidexpnsend" - type : "integer" - mode : "nullable" - - name : "lndbldgsequipend" - type : "integer" - mode : "nullable" - - name : "invstmntsend" - type : "integer" - mode : "nullable" - - name : "invstmntsothrend" - type : "integer" - mode : "nullable" - - name : "invstmntsprgmend" - type : "integer" - mode : "nullable" - - name : "intangibleassetsend" - type : "integer" - mode : "nullable" - - name : "othrassetsend" - type : "integer" - mode : "nullable" - - name : "totassetsend" - type : "integer" - mode : "nullable" - - name : "accntspayableend" - type : "integer" - mode : "nullable" - - name : "grntspayableend" - type : "integer" - mode : "nullable" - - name : "deferedrevnuend" - type : "integer" - mode : "nullable" - - name : "txexmptbndsend" - type : "integer" - mode : "nullable" - - name : "escrwaccntliabend" - type : "integer" - mode : "nullable" - - name : "paybletoffcrsend" - type : "integer" - mode : "nullable" - - name : "secrdmrtgsend" - type : "integer" - mode : "nullable" - - name : "unsecurednotesend" - type : "integer" - mode : "nullable" - - name : "othrliabend" - type : "integer" - mode : "nullable" - - name : "totliabend" - type : "integer" - mode : "nullable" - - name : "unrstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "temprstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "permrstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "capitalstktrstend" - type : "integer" - mode : "nullable" - - name : "paidinsurplusend" - type : "integer" - mode : "nullable" - - name : "retainedearnend" - type : "integer" - mode : "nullable" - - name : "totnetassetend" - type : "integer" - mode : "nullable" - - name : "totnetliabastend" - type : "integer" - mode : "nullable" - - name : "nonpfrea" - type : "integer" - mode : "nullable" - - name : "totnooforgscnt" - type : "integer" - mode : "nullable" - - name : "totsupport" - type : "integer" - mode : "nullable" - - name : "gftgrntsrcvd170" - type : "integer" - mode : "nullable" - - name : "txrevnuelevied170" - type : "integer" - mode : "nullable" - - name : "srvcsval170" - type : "integer" - mode : "nullable" - - name : "pubsuppsubtot170" - type : "integer" - mode : "nullable" - - name : "exceeds2pct170" - type : "integer" - mode : "nullable" - - name : "pubsupplesspct170" - type : "integer" - mode : "nullable" - - name : "samepubsuppsubtot170" - type : "integer" - mode : "nullable" - - name : "grsinc170" - type : "integer" - mode : "nullable" - - name : "netincunreltd170" - type : "integer" - mode : "nullable" - - name : "othrinc170" - type : "integer" - mode : "nullable" - - name : "totsupp170" - type : "integer" - mode : "nullable" - - name : "grsrcptsrelated170" - type : "integer" - mode : "nullable" - - name : "totgftgrntrcvd509" - type : "integer" - mode : "nullable" - - name : "grsrcptsadmissn509" - type : "integer" - mode : "nullable" - - name : "grsrcptsactivities509" - type : "integer" - mode : "nullable" - - name : "txrevnuelevied509" - type : "integer" - mode : "nullable" - - name : "srvcsval509" - type : "integer" - mode : "nullable" - - name : "pubsuppsubtot509" - type : "integer" - mode : "nullable" - - name : "rcvdfrmdisqualsub509" - type : "integer" - mode : "nullable" - - name : "exceeds1pct509" - type : "integer" - mode : "nullable" - - name : "subtotpub509" - type : "integer" - mode : "nullable" - - name : "pubsupplesub509" - type : "integer" - mode : "nullable" - - name : "samepubsuppsubtot509" - type : "integer" - mode : "nullable" - - name : "grsinc509" - type : "integer" - mode : "nullable" - - name : "unreltxincls511tx509" - type : "integer" - mode : "nullable" - - name : "subtotsuppinc509" - type : "integer" - mode : "nullable" - - name : "netincunrelatd509" - type : "integer" - mode : "nullable" - - name : "othrinc509" - type : "integer" - mode : "nullable" - - name : "totsupp509" - type : "integer" - mode : "nullable" + - name: "ein" + type: "string" + mode: "required" + - name: "elf" + type: "string" + mode: "nullable" + - name: "tax_pd" + type: "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" + mode: "nullable" + - name: "s501c3or4947a1cd" + type: "string" + mode: "nullable" + - name: "schdbind" + type: "string" + mode: "nullable" + - name: "politicalactvtscd" + type: "string" + mode: "nullable" + - name: "lbbyingactvtscd" + type: "string" + mode: "nullable" + - name: "subjto6033cd" + type: "string" + mode: "nullable" + - name: "dnradvisedfundscd" + type: "string" + mode: "nullable" + - name: "prptyintrcvdcd" + type: "string" + mode: "nullable" + - name: "maintwrkofartcd" + type: "string" + mode: "nullable" + - name: "crcounselingqstncd" + type: "string" + mode: "nullable" + - name: "hldassetsintermpermcd" + type: "string" + mode: "nullable" + - name: "rptlndbldgeqptcd" + type: "string" + mode: "nullable" + - name: "rptinvstothsecd" + type: "string" + mode: "nullable" + - name: "rptinvstprgrelcd" + type: "string" + mode: "nullable" + - name: "rptothasstcd" + type: "string" + mode: "nullable" + - name: "rptothliabcd" + type: "string" + mode: "nullable" + - name: "sepcnsldtfinstmtcd" + type: "string" + mode: "nullable" + - name: "sepindaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "inclinfinstmtcd" + type: "string" + mode: "nullable" + - name: "operateschools170cd" + type: "string" + mode: "nullable" + - name: "frgnofficecd" + type: "string" + mode: "nullable" + - name: "frgnrevexpnscd" + type: "string" + mode: "nullable" + - name: "frgngrntscd" + type: "string" + mode: "nullable" + - name: "frgnaggragrntscd" + type: "string" + mode: "nullable" + - name: "rptprofndrsngfeescd" + type: "string" + mode: "nullable" + - name: "rptincfnndrsngcd" + type: "string" + mode: "nullable" + - name: "rptincgamingcd" + type: "string" + mode: "nullable" + - name: "operatehosptlcd" + type: "string" + mode: "nullable" + - name: "hospaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstogovtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstoindvcd" + type: "string" + mode: "nullable" + - name: "rptyestocompnstncd" + type: "string" + mode: "nullable" + - name: "txexmptbndcd" + type: "string" + mode: "nullable" + - name: "invstproceedscd" + type: "string" + mode: "nullable" + - name: "maintescrwaccntcd" + type: "string" + mode: "nullable" + - name: "actonbehalfcd" + type: "string" + mode: "nullable" + - name: "engageexcessbnftcd" + type: "string" + mode: "nullable" + - name: "awarexcessbnftcd" + type: "string" + mode: "nullable" + - name: "loantofficercd" + type: "string" + mode: "nullable" + - name: "grantoofficercd" + type: "string" + mode: "nullable" + - name: "dirbusnreltdcd" + type: "string" + mode: "nullable" + - name: "fmlybusnreltdcd" + type: "string" + mode: "nullable" + - name: "servasofficercd" + type: "string" + mode: "nullable" + - name: "recvnoncashcd" + type: "string" + mode: "nullable" + - name: "recvartcd" + type: "string" + mode: "nullable" + - name: "ceaseoperationscd" + type: "string" + mode: "nullable" + - name: "sellorexchcd" + type: "string" + mode: "nullable" + - name: "ownsepentcd" + type: "string" + mode: "nullable" + - name: "reltdorgcd" + type: "string" + mode: "nullable" + - name: "intincntrlcd" + type: "string" + mode: "nullable" + - name: "orgtrnsfrcd" + type: "string" + mode: "nullable" + - name: "conduct5percentcd" + type: "string" + mode: "nullable" + - name: "compltschocd" + type: "string" + mode: "nullable" + - name: "f1096cnt" + type: "integer" + mode: "nullable" + - name: "fw2gcnt" + type: "integer" + mode: "nullable" + - name: "wthldngrulescd" + type: "string" + mode: "nullable" + - name: "noemplyeesw3cnt" + type: "integer" + mode: "nullable" + - name: "filerqrdrtnscd" + type: "string" + mode: "nullable" + - name: "unrelbusinccd" + type: "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + mode: "nullable" + - name: "frgnacctcd" + type: "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + mode: "nullable" + - name: "prtynotifyorgcd" + type: "string" + mode: "nullable" + - name: "filedf8886tcd" + type: "string" + mode: "nullable" + - name: "solicitcntrbcd" + type: "string" + mode: "nullable" + - name: "exprstmntcd" + type: "string" + mode: "nullable" + - name: "providegoodscd" + type: "string" + mode: "nullable" + - name: "notfydnrvalcd" + type: "string" + mode: "nullable" + - name: "filedf8282cd" + type: "string" + mode: "nullable" + - name: "f8282cnt" + type: "integer" + mode: "nullable" + - name: "fndsrcvdcd" + type: "string" + mode: "nullable" + - name: "premiumspaidcd" + type: "string" + mode: "nullable" + - name: "filedf8899cd" + type: "string" + mode: "nullable" + - name: "filedf1098ccd" + type: "string" + mode: "nullable" + - name: "excbushldngscd" + type: "string" + mode: "nullable" + - name: "s4966distribcd" + type: "string" + mode: "nullable" + - name: "distribtodonorcd" + type: "string" + mode: "nullable" + - name: "initiationfees" + type: "integer" + mode: "nullable" + - name: "grsrcptspublicuse" + type: "integer" + mode: "nullable" + - name: "grsincmembers" + type: "integer" + mode: "nullable" + - name: "grsincother" + type: "integer" + mode: "nullable" + - name: "filedlieuf1041cd" + type: "string" + mode: "nullable" + - name: "txexmptint" + type: "integer" + mode: "nullable" + - name: "qualhlthplncd" + type: "string" + mode: "nullable" + - name: "qualhlthreqmntn" + type: "integer" + mode: "nullable" + - name: "qualhlthonhnd" + type: "integer" + mode: "nullable" + - name: "rcvdpdtngcd" + type: "string" + mode: "nullable" + - name: "filedf720cd" + type: "string" + mode: "nullable" + - name: "totreprtabled" + type: "integer" + mode: "nullable" + - name: "totcomprelatede" + type: "integer" + mode: "nullable" + - name: "totestcompf" + type: "integer" + mode: "nullable" + - name: "noindiv100kcnt" + type: "integer" + mode: "nullable" + - name: "nocontractor100kcnt" + type: "integer" + mode: "nullable" + - name: "totcntrbgfts" + type: "integer" + mode: "nullable" + - name: "prgmservcode2acd" + type: "integer" + mode: "nullable" + - name: "totrev2acola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2bcd" + type: "integer" + mode: "nullable" + - name: "totrev2bcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ccd" + type: "integer" + mode: "nullable" + - name: "totrev2ccola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2dcd" + type: "integer" + mode: "nullable" + - name: "totrev2dcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ecd" + type: "integer" + mode: "nullable" + - name: "totrev2ecola" + type: "integer" + mode: "nullable" + - name: "totrev2fcola" + type: "integer" + mode: "nullable" + - name: "totprgmrevnue" + type: "integer" + mode: "nullable" + - name: "invstmntinc" + type: "integer" + mode: "nullable" + - name: "txexmptbndsproceeds" + type: "integer" + mode: "nullable" + - name: "royaltsinc" + type: "integer" + mode: "nullable" + - name: "grsrntsreal" + type: "integer" + mode: "nullable" + - name: "grsrntsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlexpnsreal" + type: "integer" + mode: "nullable" + - name: "rntlexpnsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlincreal" + type: "integer" + mode: "nullable" + - name: "rntlincprsnl" + type: "integer" + mode: "nullable" + - name: "netrntlinc" + type: "integer" + mode: "nullable" + - name: "grsalesecur" + type: "integer" + mode: "nullable" + - name: "grsalesothr" + type: "integer" + mode: "nullable" + - name: "cstbasisecur" + type: "integer" + mode: "nullable" + - name: "cstbasisothr" + type: "integer" + mode: "nullable" + - name: "gnlsecur" + type: "integer" + mode: "nullable" + - name: "gnlsothr" + type: "integer" + mode: "nullable" + - name: "netgnls" + type: "integer" + mode: "nullable" + - name: "grsincfndrsng" + type: "integer" + mode: "nullable" + - name: "lessdirfndrsng" + type: "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + mode: "nullable" + - name: "lessdirgaming" + type: "integer" + mode: "nullable" + - name: "netincgaming" + type: "integer" + mode: "nullable" + - name: "grsalesinvent" + type: "integer" + mode: "nullable" + - name: "lesscstofgoods" + type: "integer" + mode: "nullable" + - name: "netincsales" + type: "integer" + mode: "nullable" + - name: "miscrev11acd" + type: "integer" + mode: "nullable" + - name: "miscrevtota" + type: "integer" + mode: "nullable" + - name: "miscrev11bcd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11b" + type: "integer" + mode: "nullable" + - name: "miscrev11ccd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11c" + type: "integer" + mode: "nullable" + - name: "miscrevtot11d" + type: "integer" + mode: "nullable" + - name: "miscrevtot11e" + type: "integer" + mode: "nullable" + - name: "totrevenue" + type: "integer" + mode: "nullable" + - name: "grntstogovt" + type: "integer" + mode: "nullable" + - name: "grnsttoindiv" + type: "integer" + mode: "nullable" + - name: "grntstofrgngovt" + type: "integer" + mode: "nullable" + - name: "benifitsmembrs" + type: "integer" + mode: "nullable" + - name: "compnsatncurrofcr" + type: "integer" + mode: "nullable" + - name: "compnsatnandothr" + type: "integer" + mode: "nullable" + - name: "othrsalwages" + type: "integer" + mode: "nullable" + - name: "pensionplancontrb" + type: "integer" + mode: "nullable" + - name: "othremplyeebenef" + type: "integer" + mode: "nullable" + - name: "payrolltx" + type: "integer" + mode: "nullable" + - name: "feesforsrvcmgmt" + type: "integer" + mode: "nullable" + - name: "legalfees" + type: "integer" + mode: "nullable" + - name: "accntingfees" + type: "integer" + mode: "nullable" + - name: "feesforsrvclobby" + type: "integer" + mode: "nullable" + - name: "profndraising" + type: "integer" + mode: "nullable" + - name: "feesforsrvcinvstmgmt" + type: "integer" + mode: "nullable" + - name: "feesforsrvcothr" + type: "integer" + mode: "nullable" + - name: "advrtpromo" + type: "integer" + mode: "nullable" + - name: "officexpns" + type: "integer" + mode: "nullable" + - name: "infotech" + type: "integer" + mode: "nullable" + - name: "royaltsexpns" + type: "integer" + mode: "nullable" + - name: "occupancy" + type: "integer" + mode: "nullable" + - name: "travel" + type: "integer" + mode: "nullable" + - name: "travelofpublicoffcl" + type: "integer" + mode: "nullable" + - name: "converconventmtng" + type: "integer" + mode: "nullable" + - name: "interestamt" + type: "integer" + mode: "nullable" + - name: "pymtoaffiliates" + type: "integer" + mode: "nullable" + - name: "deprcatndepletn" + type: "integer" + mode: "nullable" + - name: "insurance" + type: "integer" + mode: "nullable" + - name: "othrexpnsa" + type: "integer" + mode: "nullable" + - name: "othrexpnsb" + type: "integer" + mode: "nullable" + - name: "othrexpnsc" + type: "integer" + mode: "nullable" + - name: "othrexpnsd" + type: "integer" + mode: "nullable" + - name: "othrexpnse" + type: "integer" + mode: "nullable" + - name: "othrexpnsf" + type: "integer" + mode: "nullable" + - name: "totfuncexpns" + type: "integer" + mode: "nullable" + - name: "nonintcashend" + type: "integer" + mode: "nullable" + - name: "svngstempinvend" + type: "integer" + mode: "nullable" + - name: "pldgegrntrcvblend" + type: "integer" + mode: "nullable" + - name: "accntsrcvblend" + type: "integer" + mode: "nullable" + - name: "currfrmrcvblend" + type: "integer" + mode: "nullable" + - name: "rcvbldisqualend" + type: "integer" + mode: "nullable" + - name: "notesloansrcvblend" + type: "integer" + mode: "nullable" + - name: "invntriesalesend" + type: "integer" + mode: "nullable" + - name: "prepaidexpnsend" + type: "integer" + mode: "nullable" + - name: "lndbldgsequipend" + type: "integer" + mode: "nullable" + - name: "invstmntsend" + type: "integer" + mode: "nullable" + - name: "invstmntsothrend" + type: "integer" + mode: "nullable" + - name: "invstmntsprgmend" + type: "integer" + mode: "nullable" + - name: "intangibleassetsend" + type: "integer" + mode: "nullable" + - name: "othrassetsend" + type: "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" + mode: "nullable" + - name: "accntspayableend" + type: "integer" + mode: "nullable" + - name: "grntspayableend" + type: "integer" + mode: "nullable" + - name: "deferedrevnuend" + type: "integer" + mode: "nullable" + - name: "txexmptbndsend" + type: "integer" + mode: "nullable" + - name: "escrwaccntliabend" + type: "integer" + mode: "nullable" + - name: "paybletoffcrsend" + type: "integer" + mode: "nullable" + - name: "secrdmrtgsend" + type: "integer" + mode: "nullable" + - name: "unsecurednotesend" + type: "integer" + mode: "nullable" + - name: "othrliabend" + type: "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" + mode: "nullable" + - name: "unrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "temprstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "permrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "capitalstktrstend" + type: "integer" + mode: "nullable" + - name: "paidinsurplusend" + type: "integer" + mode: "nullable" + - name: "retainedearnend" + type: "integer" + mode: "nullable" + - name: "totnetassetend" + type: "integer" + mode: "nullable" + - name: "totnetliabastend" + type: "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" + mode: "nullable" graph_paths: - - "irs_990_transform_csv >> load_irs_990_to_bq" \ No newline at end of file + - "irs_990_transform_csv >> load_irs_990_to_bq" diff --git a/datasets/irs_990/irs_990_2016/pipeline.yaml b/datasets/irs_990/irs_990_2016/pipeline.yaml index 28eb24828..04a24c38f 100644 --- a/datasets/irs_990/irs_990_2016/pipeline.yaml +++ b/datasets/irs_990/irs_990_2016/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_2016 # Description of the table - description: "irs_990_2016 dataset" + description: "IRS 990 2016 dataset" dag: airflow_version: 1 @@ -33,7 +33,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "@daily" + schedule_interval: "@daily" catchup: False default_view: graph @@ -105,749 +105,744 @@ dag: # Always use snake_case and lowercase for column names, and be explicit, # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" - mode : "required" - - name : "elf" - type : "string" - mode : "nullable" - - name : "tax_pd" - type : "integer" - mode : "nullable" - - name : "subseccd" - type : "integer" - mode : "nullable" - - name : "s501c3or4947a1cd" - type : "string" - mode : "nullable" - - name : "schdbind" - type : "string" - mode : "nullable" - - name : "politicalactvtscd" - type : "string" - mode : "nullable" - - name : "lbbyingactvtscd" - type : "string" - mode : "nullable" - - name : "subjto6033cd" - type : "string" - mode : "nullable" - - name : "dnradvisedfundscd" - type : "string" - mode : "nullable" - - name : "prptyintrcvdcd" - type : "string" - mode : "nullable" - - name : "maintwrkofartcd" - type : "string" - mode : "nullable" - - name : "crcounselingqstncd" - type : "string" - mode : "nullable" - - name : "hldassetsintermpermcd" - type : "string" - mode : "nullable" - - name : "rptlndbldgeqptcd" - type : "string" - mode : "nullable" - - name : "rptinvstothsecd" - type : "string" - mode : "nullable" - - name : "rptinvstprgrelcd" - type : "string" - mode : "nullable" - - name : "rptothasstcd" - type : "string" - mode : "nullable" - - name : "rptothliabcd" - type : "string" - mode : "nullable" - - name : "sepcnsldtfinstmtcd" - type : "string" - mode : "nullable" - - name : "sepindaudfinstmtcd" - type : "string" - mode : "nullable" - - name : "inclinfinstmtcd" - type : "string" - mode : "nullable" - - name : "operateschools170cd" - type : "string" - mode : "nullable" - - name : "frgnofficecd" - type : "string" - mode : "nullable" - - name : "frgnrevexpnscd" - type : "string" - mode : "nullable" - - name : "frgngrntscd" - type : "string" - mode : "nullable" - - name : "frgnaggragrntscd" - type : "string" - mode : "nullable" - - name : "rptprofndrsngfeescd" - type : "string" - mode : "nullable" - - name : "rptincfnndrsngcd" - type : "string" - mode : "nullable" - - name : "rptincgamingcd" - type : "string" - mode : "nullable" - - name : "operatehosptlcd" - type : "string" - mode : "nullable" - - name : "hospaudfinstmtcd" - type : "string" - mode : "nullable" - - name : "rptgrntstogovtcd" - type : "string" - mode : "nullable" - - name : "rptgrntstoindvcd" - type : "string" - mode : "nullable" - - name : "rptyestocompnstncd" - type : "string" - mode : "nullable" - - name : "txexmptbndcd" - type : "string" - mode : "nullable" - - name : "invstproceedscd" - type : "string" - mode : "nullable" - - name : "maintescrwaccntcd" - type : "string" - mode : "nullable" - - name : "actonbehalfcd" - type : "string" - mode : "nullable" - - name : "engageexcessbnftcd" - type : "string" - mode : "nullable" - - name : "awarexcessbnftcd" - type : "string" - mode : "nullable" - - name : "loantofficercd" - type : "string" - mode : "nullable" - - name : "grantoofficercd" - type : "string" - mode : "nullable" - - name : "dirbusnreltdcd" - type : "string" - mode : "nullable" - - name : "fmlybusnreltdcd" - type : "string" - mode : "nullable" - - name : "servasofficercd" - type : "string" - mode : "nullable" - - name : "recvnoncashcd" - type : "string" - mode : "nullable" - - name : "recvartcd" - type : "string" - mode : "nullable" - - name : "ceaseoperationscd" - type : "string" - mode : "nullable" - - name : "sellorexchcd" - type : "string" - mode : "nullable" - - name : "ownsepentcd" - type : "string" - mode : "nullable" - - name : "reltdorgcd" - type : "string" - mode : "nullable" - - name : "intincntrlcd" - type : "string" - mode : "nullable" - - name : "orgtrnsfrcd" - type : "string" - mode : "nullable" - - name : "conduct5percentcd" - type : "string" - mode : "nullable" - - name : "compltschocd" - type : "string" - mode : "nullable" - - name : "f1096cnt" - type : "integer" - mode : "nullable" - - name : "fw2gcnt" - type : "integer" - mode : "nullable" - - name : "wthldngrulescd" - type : "string" - mode : "nullable" - - name : "noemplyeesw3cnt" - type : "integer" - mode : "nullable" - - name : "filerqrdrtnscd" - type : "string" - mode : "nullable" - - name : "unrelbusinccd" - type : "string" - mode : "nullable" - - name : "filedf990tcd" - type : "string" - mode : "nullable" - - name : "frgnacctcd" - type : "string" - mode : "nullable" - - name : "prohibtdtxshltrcd" - type : "string" - mode : "nullable" - - name : "prtynotifyorgcd" - type : "string" - mode : "nullable" - - name : "filedf8886tcd" - type : "string" - mode : "nullable" - - name : "solicitcntrbcd" - type : "string" - mode : "nullable" - - name : "exprstmntcd" - type : "string" - mode : "nullable" - - name : "providegoodscd" - type : "string" - mode : "nullable" - - name : "notfydnrvalcd" - type : "string" - mode : "nullable" - - name : "filedf8282cd" - type : "string" - mode : "nullable" - - name : "f8282cnt" - type : "integer" - mode : "nullable" - - name : "fndsrcvdcd" - type : "string" - mode : "nullable" - - name : "premiumspaidcd" - type : "string" - mode : "nullable" - - name : "filedf8899cd" - type : "string" - mode : "nullable" - - name : "filedf1098ccd" - type : "string" - mode : "nullable" - - name : "excbushldngscd" - type : "string" - mode : "nullable" - - name : "s4966distribcd" - type : "string" - mode : "nullable" - - name : "distribtodonorcd" - type : "string" - mode : "nullable" - - name : "initiationfees" - type : "integer" - mode : "nullable" - - name : "grsrcptspublicuse" - type : "integer" - mode : "nullable" - - name : "grsincmembers" - type : "integer" - mode : "nullable" - - name : "grsincother" - type : "integer" - mode : "nullable" - - name : "filedlieuf1041cd" - type : "string" - mode : "nullable" - - name : "txexmptint" - type : "integer" - mode : "nullable" - - name : "qualhlthplncd" - type : "string" - mode : "nullable" - - name : "qualhlthreqmntn" - type : "integer" - mode : "nullable" - - name : "qualhlthonhnd" - type : "integer" - mode : "nullable" - - name : "rcvdpdtngcd" - type : "string" - mode : "nullable" - - name : "filedf720cd" - type : "string" - mode : "nullable" - - name : "totreprtabled" - type : "integer" - mode : "nullable" - - name : "totcomprelatede" - type : "integer" - mode : "nullable" - - name : "totestcompf" - type : "integer" - mode : "nullable" - - name : "noindiv100kcnt" - type : "integer" - mode : "nullable" - - name : "nocontractor100kcnt" - type : "integer" - mode : "nullable" - - name : "totcntrbgfts" - type : "integer" - mode : "nullable" - - name : "prgmservcode2acd" - type : "integer" - mode : "nullable" - - name : "totrev2acola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2bcd" - type : "integer" - mode : "nullable" - - name : "totrev2bcola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2ccd" - type : "integer" - mode : "nullable" - - name : "totrev2ccola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2dcd" - type : "integer" - mode : "nullable" - - name : "totrev2dcola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2ecd" - type : "integer" - mode : "nullable" - - name : "totrev2ecola" - type : "integer" - mode : "nullable" - - name : "totrev2fcola" - type : "integer" - mode : "nullable" - - name : "totprgmrevnue" - type : "integer" - mode : "nullable" - - name : "invstmntinc" - type : "integer" - mode : "nullable" - - name : "txexmptbndsproceeds" - type : "integer" - mode : "nullable" - - name : "royaltsinc" - type : "integer" - mode : "nullable" - - name : "grsrntsreal" - type : "integer" - mode : "nullable" - - name : "grsrntsprsnl" - type : "integer" - mode : "nullable" - - name : "rntlexpnsreal" - type : "integer" - mode : "nullable" - - name : "rntlexpnsprsnl" - type : "integer" - mode : "nullable" - - name : "rntlincreal" - type : "integer" - mode : "nullable" - - name : "rntlincprsnl" - type : "integer" - mode : "nullable" - - name : "netrntlinc" - type : "integer" - mode : "nullable" - - name : "grsalesecur" - type : "integer" - mode : "nullable" - - name : "grsalesothr" - type : "integer" - mode : "nullable" - - name : "cstbasisecur" - type : "integer" - mode : "nullable" - - name : "cstbasisothr" - type : "integer" - mode : "nullable" - - name : "gnlsecur" - type : "integer" - mode : "nullable" - - name : "gnlsothr" - type : "integer" - mode : "nullable" - - name : "netgnls" - type : "integer" - mode : "nullable" - - name : "grsincfndrsng" - type : "integer" - mode : "nullable" - - name : "lessdirfndrsng" - type : "integer" - mode : "nullable" - - name : "netincfndrsng" - type : "integer" - mode : "nullable" - - name : "grsincgaming" - type : "integer" - mode : "nullable" - - name : "lessdirgaming" - type : "integer" - mode : "nullable" - - name : "netincgaming" - type : "integer" - mode : "nullable" - - name : "grsalesinvent" - type : "integer" - mode : "nullable" - - name : "lesscstofgoods" - type : "integer" - mode : "nullable" - - name : "netincsales" - type : "integer" - mode : "nullable" - - name : "miscrev11acd" - type : "integer" - mode : "nullable" - - name : "miscrevtota" - type : "integer" - mode : "nullable" - - name : "miscrev11bcd" - type : "integer" - mode : "nullable" - - name : "miscrevtot11b" - type : "integer" - mode : "nullable" - - name : "miscrev11ccd" - type : "integer" - mode : "nullable" - - name : "miscrevtot11c" - type : "integer" - mode : "nullable" - - name : "miscrevtot11d" - type : "integer" - mode : "nullable" - - name : "miscrevtot11e" - type : "integer" - mode : "nullable" - - name : "totrevenue" - type : "integer" - mode : "nullable" - - name : "grntstogovt" - type : "integer" - mode : "nullable" - - name : "grnsttoindiv" - type : "integer" - mode : "nullable" - - name : "grntstofrgngovt" - type : "integer" - mode : "nullable" - - name : "benifitsmembrs" - type : "integer" - mode : "nullable" - - name : "compnsatncurrofcr" - type : "integer" - mode : "nullable" - - name : "compnsatnandothr" - type : "integer" - mode : "nullable" - - name : "othrsalwages" - type : "integer" - mode : "nullable" - - name : "pensionplancontrb" - type : "integer" - mode : "nullable" - - name : "othremplyeebenef" - type : "integer" - mode : "nullable" - - name : "payrolltx" - type : "integer" - mode : "nullable" - - name : "feesforsrvcmgmt" - type : "integer" - mode : "nullable" - - name : "legalfees" - type : "integer" - mode : "nullable" - - name : "accntingfees" - type : "integer" - mode : "nullable" - - name : "feesforsrvclobby" - type : "integer" - mode : "nullable" - - name : "profndraising" - type : "integer" - mode : "nullable" - - name : "feesforsrvcinvstmgmt" - type : "integer" - mode : "nullable" - - name : "feesforsrvcothr" - type : "integer" - mode : "nullable" - - name : "advrtpromo" - type : "integer" - mode : "nullable" - - name : "officexpns" - type : "integer" - mode : "nullable" - - name : "infotech" - type : "integer" - mode : "nullable" - - name : "royaltsexpns" - type : "integer" - mode : "nullable" - - name : "occupancy" - type : "integer" - mode : "nullable" - - name : "travel" - type : "integer" - mode : "nullable" - - name : "travelofpublicoffcl" - type : "integer" - mode : "nullable" - - name : "converconventmtng" - type : "integer" - mode : "nullable" - - name : "interestamt" - type : "integer" - mode : "nullable" - - name : "pymtoaffiliates" - type : "integer" - mode : "nullable" - - name : "deprcatndepletn" - type : "integer" - mode : "nullable" - - name : "insurance" - type : "integer" - mode : "nullable" - - name : "othrexpnsa" - type : "integer" - mode : "nullable" - - name : "othrexpnsb" - type : "integer" - mode : "nullable" - - name : "othrexpnsc" - type : "integer" - mode : "nullable" - - name : "othrexpnsd" - type : "integer" - mode : "nullable" - - name : "othrexpnse" - type : "integer" - mode : "nullable" - - name : "othrexpnsf" - type : "integer" - mode : "nullable" - - name : "totfuncexpns" - type : "integer" - mode : "nullable" - - name : "nonintcashend" - type : "integer" - mode : "nullable" - - name : "svngstempinvend" - type : "integer" - mode : "nullable" - - name : "pldgegrntrcvblend" - type : "integer" - mode : "nullable" - - name : "accntsrcvblend" - type : "integer" - mode : "nullable" - - name : "currfrmrcvblend" - type : "integer" - mode : "nullable" - - name : "rcvbldisqualend" - type : "integer" - mode : "nullable" - - name : "notesloansrcvblend" - type : "integer" - mode : "nullable" - - name : "invntriesalesend" - type : "integer" - mode : "nullable" - - name : "prepaidexpnsend" - type : "integer" - mode : "nullable" - - name : "lndbldgsequipend" - type : "integer" - mode : "nullable" - - name : "invstmntsend" - type : "integer" - mode : "nullable" - - name : "invstmntsothrend" - type : "integer" - mode : "nullable" - - name : "invstmntsprgmend" - type : "integer" - mode : "nullable" - - name : "intangibleassetsend" - type : "integer" - mode : "nullable" - - name : "othrassetsend" - type : "integer" - mode : "nullable" - - name : "totassetsend" - type : "integer" - mode : "nullable" - - name : "accntspayableend" - type : "integer" - mode : "nullable" - - name : "grntspayableend" - type : "integer" - mode : "nullable" - - name : "deferedrevnuend" - type : "integer" - mode : "nullable" - - name : "txexmptbndsend" - type : "integer" - mode : "nullable" - - name : "escrwaccntliabend" - type : "integer" - mode : "nullable" - - name : "paybletoffcrsend" - type : "integer" - mode : "nullable" - - name : "secrdmrtgsend" - type : "integer" - mode : "nullable" - - name : "unsecurednotesend" - type : "integer" - mode : "nullable" - - name : "othrliabend" - type : "integer" - mode : "nullable" - - name : "totliabend" - type : "integer" - mode : "nullable" - - name : "unrstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "temprstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "permrstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "capitalstktrstend" - type : "integer" - mode : "nullable" - - name : "paidinsurplusend" - type : "integer" - mode : "nullable" - - name : "retainedearnend" - type : "integer" - mode : "nullable" - - name : "totnetassetend" - type : "integer" - mode : "nullable" - - name : "totnetliabastend" - type : "integer" - mode : "nullable" - - name : "nonpfrea" - type : "integer" - mode : "nullable" - - name : "totnooforgscnt" - type : "integer" - mode : "nullable" - - name : "totsupport" - type : "integer" - mode : "nullable" - - name : "gftgrntsrcvd170" - type : "integer" - mode : "nullable" - - name : "txrevnuelevied170" - type : "integer" - mode : "nullable" - - name : "srvcsval170" - type : "integer" - mode : "nullable" - - name : "pubsuppsubtot170" - type : "integer" - mode : "nullable" - - name : "exceeds2pct170" - type : "integer" - mode : "nullable" - - name : "pubsupplesspct170" - type : "integer" - mode : "nullable" - - name : "samepubsuppsubtot170" - type : "integer" - mode : "nullable" - - name : "grsinc170" - type : "integer" - mode : "nullable" - - name : "netincunreltd170" - type : "integer" - mode : "nullable" - - name : "othrinc170" - type : "integer" - mode : "nullable" - - name : "totsupp170" - type : "integer" - mode : "nullable" - - name : "grsrcptsrelated170" - type : "integer" - mode : "nullable" - - name : "totgftgrntrcvd509" - type : "integer" - mode : "nullable" - - name : "grsrcptsadmissn509" - type : "integer" - mode : "nullable" - - name : "grsrcptsactivities509" - type : "integer" - mode : "nullable" - - name : "txrevnuelevied509" - type : "integer" - mode : "nullable" - - name : "srvcsval509" - type : "integer" - mode : "nullable" - - name : "pubsuppsubtot509" - type : "integer" - mode : "nullable" - - name : "rcvdfrmdisqualsub509" - type : "integer" - mode : "nullable" - - name : "exceeds1pct509" - type : "integer" - mode : "nullable" - - name : "subtotpub509" - type : "integer" - mode : "nullable" - - name : "pubsupplesub509" - type : "integer" - mode : "nullable" - - name : "samepubsuppsubtot509" - type : "integer" - mode : "nullable" - - name : "grsinc509" - type : "integer" - mode : "nullable" - - name : "unreltxincls511tx509" - type : "integer" - mode : "nullable" - - name : "subtotsuppinc509" - type : "integer" - mode : "nullable" - - name : "netincunrelatd509" - type : "integer" - mode : "nullable" - - name : "othrinc509" - type : "integer" - mode : "nullable" - - name : "totsupp509" - type : "integer" - mode : "nullable" + - name: "ein" + type: "string" + mode: "required" + - name: "elf" + type: "string" + mode: "nullable" + - name: "tax_pd" + type: "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" + mode: "nullable" + - name: "s501c3or4947a1cd" + type: "string" + mode: "nullable" + - name: "schdbind" + type: "string" + mode: "nullable" + - name: "politicalactvtscd" + type: "string" + mode: "nullable" + - name: "lbbyingactvtscd" + type: "string" + mode: "nullable" + - name: "subjto6033cd" + type: "string" + mode: "nullable" + - name: "dnradvisedfundscd" + type: "string" + mode: "nullable" + - name: "prptyintrcvdcd" + type: "string" + mode: "nullable" + - name: "maintwrkofartcd" + type: "string" + mode: "nullable" + - name: "crcounselingqstncd" + type: "string" + mode: "nullable" + - name: "hldassetsintermpermcd" + type: "string" + mode: "nullable" + - name: "rptlndbldgeqptcd" + type: "string" + mode: "nullable" + - name: "rptinvstothsecd" + type: "string" + mode: "nullable" + - name: "rptinvstprgrelcd" + type: "string" + mode: "nullable" + - name: "rptothasstcd" + type: "string" + mode: "nullable" + - name: "rptothliabcd" + type: "string" + mode: "nullable" + - name: "sepcnsldtfinstmtcd" + type: "string" + mode: "nullable" + - name: "sepindaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "inclinfinstmtcd" + type: "string" + mode: "nullable" + - name: "operateschools170cd" + type: "string" + mode: "nullable" + - name: "frgnofficecd" + type: "string" + mode: "nullable" + - name: "frgnrevexpnscd" + type: "string" + mode: "nullable" + - name: "frgngrntscd" + type: "string" + mode: "nullable" + - name: "frgnaggragrntscd" + type: "string" + mode: "nullable" + - name: "rptprofndrsngfeescd" + type: "string" + mode: "nullable" + - name: "rptincfnndrsngcd" + type: "string" + mode: "nullable" + - name: "rptincgamingcd" + type: "string" + mode: "nullable" + - name: "operatehosptlcd" + type: "string" + mode: "nullable" + - name: "hospaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstogovtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstoindvcd" + type: "string" + mode: "nullable" + - name: "rptyestocompnstncd" + type: "string" + mode: "nullable" + - name: "txexmptbndcd" + type: "string" + mode: "nullable" + - name: "invstproceedscd" + type: "string" + mode: "nullable" + - name: "maintescrwaccntcd" + type: "string" + mode: "nullable" + - name: "actonbehalfcd" + type: "string" + mode: "nullable" + - name: "engageexcessbnftcd" + type: "string" + mode: "nullable" + - name: "awarexcessbnftcd" + type: "string" + mode: "nullable" + - name: "loantofficercd" + type: "string" + mode: "nullable" + - name: "grantoofficercd" + type: "string" + mode: "nullable" + - name: "dirbusnreltdcd" + type: "string" + mode: "nullable" + - name: "fmlybusnreltdcd" + type: "string" + mode: "nullable" + - name: "servasofficercd" + type: "string" + mode: "nullable" + - name: "recvnoncashcd" + type: "string" + mode: "nullable" + - name: "recvartcd" + type: "string" + mode: "nullable" + - name: "ceaseoperationscd" + type: "string" + mode: "nullable" + - name: "sellorexchcd" + type: "string" + mode: "nullable" + - name: "ownsepentcd" + type: "string" + mode: "nullable" + - name: "reltdorgcd" + type: "string" + mode: "nullable" + - name: "intincntrlcd" + type: "string" + mode: "nullable" + - name: "orgtrnsfrcd" + type: "string" + mode: "nullable" + - name: "conduct5percentcd" + type: "string" + mode: "nullable" + - name: "compltschocd" + type: "string" + mode: "nullable" + - name: "f1096cnt" + type: "integer" + mode: "nullable" + - name: "fw2gcnt" + type: "integer" + mode: "nullable" + - name: "wthldngrulescd" + type: "string" + mode: "nullable" + - name: "noemplyeesw3cnt" + type: "integer" + mode: "nullable" + - name: "filerqrdrtnscd" + type: "string" + mode: "nullable" + - name: "unrelbusinccd" + type: "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + mode: "nullable" + - name: "frgnacctcd" + type: "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + mode: "nullable" + - name: "prtynotifyorgcd" + type: "string" + mode: "nullable" + - name: "filedf8886tcd" + type: "string" + mode: "nullable" + - name: "solicitcntrbcd" + type: "string" + mode: "nullable" + - name: "exprstmntcd" + type: "string" + mode: "nullable" + - name: "providegoodscd" + type: "string" + mode: "nullable" + - name: "notfydnrvalcd" + type: "string" + mode: "nullable" + - name: "filedf8282cd" + type: "string" + mode: "nullable" + - name: "f8282cnt" + type: "integer" + mode: "nullable" + - name: "fndsrcvdcd" + type: "string" + mode: "nullable" + - name: "premiumspaidcd" + type: "string" + mode: "nullable" + - name: "filedf8899cd" + type: "string" + mode: "nullable" + - name: "filedf1098ccd" + type: "string" + mode: "nullable" + - name: "excbushldngscd" + type: "string" + mode: "nullable" + - name: "s4966distribcd" + type: "string" + mode: "nullable" + - name: "distribtodonorcd" + type: "string" + mode: "nullable" + - name: "initiationfees" + type: "integer" + mode: "nullable" + - name: "grsrcptspublicuse" + type: "integer" + mode: "nullable" + - name: "grsincmembers" + type: "integer" + mode: "nullable" + - name: "grsincother" + type: "integer" + mode: "nullable" + - name: "filedlieuf1041cd" + type: "string" + mode: "nullable" + - name: "txexmptint" + type: "integer" + mode: "nullable" + - name: "qualhlthplncd" + type: "string" + mode: "nullable" + - name: "qualhlthreqmntn" + type: "integer" + mode: "nullable" + - name: "qualhlthonhnd" + type: "integer" + mode: "nullable" + - name: "rcvdpdtngcd" + type: "string" + mode: "nullable" + - name: "filedf720cd" + type: "string" + mode: "nullable" + - name: "totreprtabled" + type: "integer" + mode: "nullable" + - name: "totcomprelatede" + type: "integer" + mode: "nullable" + - name: "totestcompf" + type: "integer" + mode: "nullable" + - name: "noindiv100kcnt" + type: "integer" + mode: "nullable" + - name: "nocontractor100kcnt" + type: "integer" + mode: "nullable" + - name: "totcntrbgfts" + type: "integer" + mode: "nullable" + - name: "prgmservcode2acd" + type: "integer" + mode: "nullable" + - name: "totrev2acola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2bcd" + type: "integer" + mode: "nullable" + - name: "totrev2bcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ccd" + type: "integer" + mode: "nullable" + - name: "totrev2ccola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2dcd" + type: "integer" + mode: "nullable" + - name: "totrev2dcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ecd" + type: "integer" + mode: "nullable" + - name: "totrev2ecola" + type: "integer" + mode: "nullable" + - name: "totrev2fcola" + type: "integer" + mode: "nullable" + - name: "totprgmrevnue" + type: "integer" + mode: "nullable" + - name: "invstmntinc" + type: "integer" + mode: "nullable" + - name: "txexmptbndsproceeds" + type: "integer" + mode: "nullable" + - name: "royaltsinc" + type: "integer" + mode: "nullable" + - name: "grsrntsreal" + type: "integer" + mode: "nullable" + - name: "grsrntsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlexpnsreal" + type: "integer" + mode: "nullable" + - name: "rntlexpnsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlincreal" + type: "integer" + mode: "nullable" + - name: "rntlincprsnl" + type: "integer" + mode: "nullable" + - name: "netrntlinc" + type: "integer" + mode: "nullable" + - name: "grsalesecur" + type: "integer" + mode: "nullable" + - name: "grsalesothr" + type: "integer" + mode: "nullable" + - name: "cstbasisecur" + type: "integer" + mode: "nullable" + - name: "cstbasisothr" + type: "integer" + mode: "nullable" + - name: "gnlsecur" + type: "integer" + mode: "nullable" + - name: "gnlsothr" + type: "integer" + mode: "nullable" + - name: "netgnls" + type: "integer" + mode: "nullable" + - name: "grsincfndrsng" + type: "integer" + mode: "nullable" + - name: "lessdirfndrsng" + type: "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + mode: "nullable" + - name: "lessdirgaming" + type: "integer" + mode: "nullable" + - name: "netincgaming" + type: "integer" + mode: "nullable" + - name: "grsalesinvent" + type: "integer" + mode: "nullable" + - name: "lesscstofgoods" + type: "integer" + mode: "nullable" + - name: "netincsales" + type: "integer" + mode: "nullable" + - name: "miscrev11acd" + type: "integer" + mode: "nullable" + - name: "miscrevtota" + type: "integer" + mode: "nullable" + - name: "miscrev11bcd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11b" + type: "integer" + mode: "nullable" + - name: "miscrev11ccd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11c" + type: "integer" + mode: "nullable" + - name: "miscrevtot11d" + type: "integer" + mode: "nullable" + - name: "miscrevtot11e" + type: "integer" + mode: "nullable" + - name: "totrevenue" + type: "integer" + mode: "nullable" + - name: "grntstogovt" + type: "integer" + mode: "nullable" + - name: "grnsttoindiv" + type: "integer" + mode: "nullable" + - name: "grntstofrgngovt" + type: "integer" + mode: "nullable" + - name: "benifitsmembrs" + type: "integer" + mode: "nullable" + - name: "compnsatncurrofcr" + type: "integer" + mode: "nullable" + - name: "compnsatnandothr" + type: "integer" + mode: "nullable" + - name: "othrsalwages" + type: "integer" + mode: "nullable" + - name: "pensionplancontrb" + type: "integer" + mode: "nullable" + - name: "othremplyeebenef" + type: "integer" + mode: "nullable" + - name: "payrolltx" + type: "integer" + mode: "nullable" + - name: "feesforsrvcmgmt" + type: "integer" + mode: "nullable" + - name: "legalfees" + type: "integer" + mode: "nullable" + - name: "accntingfees" + type: "integer" + mode: "nullable" + - name: "feesforsrvclobby" + type: "integer" + mode: "nullable" + - name: "profndraising" + type: "integer" + mode: "nullable" + - name: "feesforsrvcinvstmgmt" + type: "integer" + mode: "nullable" + - name: "feesforsrvcothr" + type: "integer" + mode: "nullable" + - name: "advrtpromo" + type: "integer" + mode: "nullable" + - name: "officexpns" + type: "integer" + mode: "nullable" + - name: "infotech" + type: "integer" + mode: "nullable" + - name: "royaltsexpns" + type: "integer" + mode: "nullable" + - name: "occupancy" + type: "integer" + mode: "nullable" + - name: "travel" + type: "integer" + mode: "nullable" + - name: "travelofpublicoffcl" + type: "integer" + mode: "nullable" + - name: "converconventmtng" + type: "integer" + mode: "nullable" + - name: "interestamt" + type: "integer" + mode: "nullable" + - name: "pymtoaffiliates" + type: "integer" + mode: "nullable" + - name: "deprcatndepletn" + type: "integer" + mode: "nullable" + - name: "insurance" + type: "integer" + mode: "nullable" + - name: "othrexpnsa" + type: "integer" + mode: "nullable" + - name: "othrexpnsb" + type: "integer" + mode: "nullable" + - name: "othrexpnsc" + type: "integer" + mode: "nullable" + - name: "othrexpnsd" + type: "integer" + mode: "nullable" + - name: "othrexpnse" + type: "integer" + mode: "nullable" + - name: "othrexpnsf" + type: "integer" + mode: "nullable" + - name: "totfuncexpns" + type: "integer" + mode: "nullable" + - name: "nonintcashend" + type: "integer" + mode: "nullable" + - name: "svngstempinvend" + type: "integer" + mode: "nullable" + - name: "pldgegrntrcvblend" + type: "integer" + mode: "nullable" + - name: "accntsrcvblend" + type: "integer" + mode: "nullable" + - name: "currfrmrcvblend" + type: "integer" + mode: "nullable" + - name: "rcvbldisqualend" + type: "integer" + mode: "nullable" + - name: "notesloansrcvblend" + type: "integer" + mode: "nullable" + - name: "invntriesalesend" + type: "integer" + mode: "nullable" + - name: "prepaidexpnsend" + type: "integer" + mode: "nullable" + - name: "lndbldgsequipend" + type: "integer" + mode: "nullable" + - name: "invstmntsend" + type: "integer" + mode: "nullable" + - name: "invstmntsothrend" + type: "integer" + mode: "nullable" + - name: "invstmntsprgmend" + type: "integer" + mode: "nullable" + - name: "intangibleassetsend" + type: "integer" + mode: "nullable" + - name: "othrassetsend" + type: "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" + mode: "nullable" + - name: "accntspayableend" + type: "integer" + mode: "nullable" + - name: "grntspayableend" + type: "integer" + mode: "nullable" + - name: "deferedrevnuend" + type: "integer" + mode: "nullable" + - name: "txexmptbndsend" + type: "integer" + mode: "nullable" + - name: "escrwaccntliabend" + type: "integer" + mode: "nullable" + - name: "paybletoffcrsend" + type: "integer" + mode: "nullable" + - name: "secrdmrtgsend" + type: "integer" + mode: "nullable" + - name: "unsecurednotesend" + type: "integer" + mode: "nullable" + - name: "othrliabend" + type: "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" + mode: "nullable" + - name: "unrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "temprstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "permrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "capitalstktrstend" + type: "integer" + mode: "nullable" + - name: "paidinsurplusend" + type: "integer" + mode: "nullable" + - name: "retainedearnend" + type: "integer" + mode: "nullable" + - name: "totnetassetend" + type: "integer" + mode: "nullable" + - name: "totnetliabastend" + type: "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" + mode: "nullable" graph_paths: - "irs_990_2016_transform_csv >> load_irs_990_2016_to_bq" - - - - - \ No newline at end of file diff --git a/datasets/irs_990/irs_990_2017/pipeline.yaml b/datasets/irs_990/irs_990_2017/pipeline.yaml index 1e365c9e7..d19f5d0ab 100644 --- a/datasets/irs_990/irs_990_2017/pipeline.yaml +++ b/datasets/irs_990/irs_990_2017/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_2017 # Description of the table - description: "irs_990_2017 dataset" + description: "IRS 990 2017 dataset" dag: airflow_version: 1 @@ -33,7 +33,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "@daily" + schedule_interval: "@daily" catchup: False default_view: graph @@ -104,751 +104,746 @@ dag: # https://cloud.google.com/bigquery/docs/schemas. # Always use snake_case and lowercase for column names, and be explicit, # i.e. specify modes for all columns. - + schema_fields: - - name : "ein" - type : "string" - mode : "required" - - name : "elf" - type : "string" - mode : "nullable" - - name : "tax_pd" - type : "integer" - mode : "nullable" - - name : "subseccd" - type : "integer" - mode : "nullable" - - name : "s501c3or4947a1cd" - type : "string" - mode : "nullable" - - name : "schdbind" - type : "string" - mode : "nullable" - - name : "politicalactvtscd" - type : "string" - mode : "nullable" - - name : "lbbyingactvtscd" - type : "string" - mode : "nullable" - - name : "subjto6033cd" - type : "string" - mode : "nullable" - - name : "dnradvisedfundscd" - type : "string" - mode : "nullable" - - name : "prptyintrcvdcd" - type : "string" - mode : "nullable" - - name : "maintwrkofartcd" - type : "string" - mode : "nullable" - - name : "crcounselingqstncd" - type : "string" - mode : "nullable" - - name : "hldassetsintermpermcd" - type : "string" - mode : "nullable" - - name : "rptlndbldgeqptcd" - type : "string" - mode : "nullable" - - name : "rptinvstothsecd" - type : "string" - mode : "nullable" - - name : "rptinvstprgrelcd" - type : "string" - mode : "nullable" - - name : "rptothasstcd" - type : "string" - mode : "nullable" - - name : "rptothliabcd" - type : "string" - mode : "nullable" - - name : "sepcnsldtfinstmtcd" - type : "string" - mode : "nullable" - - name : "sepindaudfinstmtcd" - type : "string" - mode : "nullable" - - name : "inclinfinstmtcd" - type : "string" - mode : "nullable" - - name : "operateschools170cd" - type : "string" - mode : "nullable" - - name : "frgnofficecd" - type : "string" - mode : "nullable" - - name : "frgnrevexpnscd" - type : "string" - mode : "nullable" - - name : "frgngrntscd" - type : "string" - mode : "nullable" - - name : "frgnaggragrntscd" - type : "string" - mode : "nullable" - - name : "rptprofndrsngfeescd" - type : "string" - mode : "nullable" - - name : "rptincfnndrsngcd" - type : "string" - mode : "nullable" - - name : "rptincgamingcd" - type : "string" - mode : "nullable" - - name : "operatehosptlcd" - type : "string" - mode : "nullable" - - name : "hospaudfinstmtcd" - type : "string" - mode : "nullable" - - name : "rptgrntstogovtcd" - type : "string" - mode : "nullable" - - name : "rptgrntstoindvcd" - type : "string" - mode : "nullable" - - name : "rptyestocompnstncd" - type : "string" - mode : "nullable" - - name : "txexmptbndcd" - type : "string" - mode : "nullable" - - name : "invstproceedscd" - type : "string" - mode : "nullable" - - name : "maintescrwaccntcd" - type : "string" - mode : "nullable" - - name : "actonbehalfcd" - type : "string" - mode : "nullable" - - name : "engageexcessbnftcd" - type : "string" - mode : "nullable" - - name : "awarexcessbnftcd" - type : "string" - mode : "nullable" - - name : "loantofficercd" - type : "string" - mode : "nullable" - - name : "grantoofficercd" - type : "string" - mode : "nullable" - - name : "dirbusnreltdcd" - type : "string" - mode : "nullable" - - name : "fmlybusnreltdcd" - type : "string" - mode : "nullable" - - name : "servasofficercd" - type : "string" - mode : "nullable" - - name : "recvnoncashcd" - type : "string" - mode : "nullable" - - name : "recvartcd" - type : "string" - mode : "nullable" - - name : "ceaseoperationscd" - type : "string" - mode : "nullable" - - name : "sellorexchcd" - type : "string" - mode : "nullable" - - name : "ownsepentcd" - type : "string" - mode : "nullable" - - name : "reltdorgcd" - type : "string" - mode : "nullable" - - name : "intincntrlcd" - type : "string" - mode : "nullable" - - name : "orgtrnsfrcd" - type : "string" - mode : "nullable" - - name : "conduct5percentcd" - type : "string" - mode : "nullable" - - name : "compltschocd" - type : "string" - mode : "nullable" - - name : "f1096cnt" - type : "integer" - mode : "nullable" - - name : "fw2gcnt" - type : "integer" - mode : "nullable" - - name : "wthldngrulescd" - type : "string" - mode : "nullable" - - name : "noemplyeesw3cnt" - type : "integer" - mode : "nullable" - - name : "filerqrdrtnscd" - type : "string" - mode : "nullable" - - name : "unrelbusinccd" - type : "string" - mode : "nullable" - - name : "filedf990tcd" - type : "string" - mode : "nullable" - - name : "frgnacctcd" - type : "string" - mode : "nullable" - - name : "prohibtdtxshltrcd" - type : "string" - mode : "nullable" - - name : "prtynotifyorgcd" - type : "string" - mode : "nullable" - - name : "filedf8886tcd" - type : "string" - mode : "nullable" - - name : "solicitcntrbcd" - type : "string" - mode : "nullable" - - name : "exprstmntcd" - type : "string" - mode : "nullable" - - name : "providegoodscd" - type : "string" - mode : "nullable" - - name : "notfydnrvalcd" - type : "string" - mode : "nullable" - - name : "filedf8282cd" - type : "string" - mode : "nullable" - - name : "f8282cnt" - type : "integer" - mode : "nullable" - - name : "fndsrcvdcd" - type : "string" - mode : "nullable" - - name : "premiumspaidcd" - type : "string" - mode : "nullable" - - name : "filedf8899cd" - type : "string" - mode : "nullable" - - name : "filedf1098ccd" - type : "string" - mode : "nullable" - - name : "excbushldngscd" - type : "string" - mode : "nullable" - - name : "s4966distribcd" - type : "string" - mode : "nullable" - - name : "distribtodonorcd" - type : "string" - mode : "nullable" - - name : "initiationfees" - type : "integer" - mode : "nullable" - - name : "grsrcptspublicuse" - type : "integer" - mode : "nullable" - - name : "grsincmembers" - type : "integer" - mode : "nullable" - - name : "grsincother" - type : "integer" - mode : "nullable" - - name : "filedlieuf1041cd" - type : "string" - mode : "nullable" - - name : "txexmptint" - type : "integer" - mode : "nullable" - - name : "qualhlthplncd" - type : "string" - mode : "nullable" - - name : "qualhlthreqmntn" - type : "integer" - mode : "nullable" - - name : "qualhlthonhnd" - type : "integer" - mode : "nullable" - - name : "rcvdpdtngcd" - type : "string" - mode : "nullable" - - name : "filedf720cd" - type : "string" - mode : "nullable" - - name : "totreprtabled" - type : "integer" - mode : "nullable" - - name : "totcomprelatede" - type : "integer" - mode : "nullable" - - name : "totestcompf" - type : "integer" - mode : "nullable" - - name : "noindiv100kcnt" - type : "integer" - mode : "nullable" - - name : "nocontractor100kcnt" - type : "integer" - mode : "nullable" - - name : "totcntrbgfts" - type : "integer" - mode : "nullable" - - name : "prgmservcode2acd" - type : "integer" - mode : "nullable" - - name : "totrev2acola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2bcd" - type : "integer" - mode : "nullable" - - name : "totrev2bcola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2ccd" - type : "integer" - mode : "nullable" - - name : "totrev2ccola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2dcd" - type : "integer" - mode : "nullable" - - name : "totrev2dcola" - type : "integer" - mode : "nullable" - - name : "prgmservcode2ecd" - type : "integer" - mode : "nullable" - - name : "totrev2ecola" - type : "integer" - mode : "nullable" - - name : "totrev2fcola" - type : "integer" - mode : "nullable" - - name : "totprgmrevnue" - type : "integer" - mode : "nullable" - - name : "invstmntinc" - type : "integer" - mode : "nullable" - - name : "txexmptbndsproceeds" - type : "integer" - mode : "nullable" - - name : "royaltsinc" - type : "integer" - mode : "nullable" - - name : "grsrntsreal" - type : "integer" - mode : "nullable" - - name : "grsrntsprsnl" - type : "integer" - mode : "nullable" - - name : "rntlexpnsreal" - type : "integer" - mode : "nullable" - - name : "rntlexpnsprsnl" - type : "integer" - mode : "nullable" - - name : "rntlincreal" - type : "integer" - mode : "nullable" - - name : "rntlincprsnl" - type : "integer" - mode : "nullable" - - name : "netrntlinc" - type : "integer" - mode : "nullable" - - name : "grsalesecur" - type : "integer" - mode : "nullable" - - name : "grsalesothr" - type : "integer" - mode : "nullable" - - name : "cstbasisecur" - type : "integer" - mode : "nullable" - - name : "cstbasisothr" - type : "integer" - mode : "nullable" - - name : "gnlsecur" - type : "integer" - mode : "nullable" - - name : "gnlsothr" - type : "integer" - mode : "nullable" - - name : "netgnls" - type : "integer" - mode : "nullable" - - name : "grsincfndrsng" - type : "integer" - mode : "nullable" - - name : "lessdirfndrsng" - type : "integer" - mode : "nullable" - - name : "netincfndrsng" - type : "integer" - mode : "nullable" - - name : "grsincgaming" - type : "integer" - mode : "nullable" - - name : "lessdirgaming" - type : "integer" - mode : "nullable" - - name : "netincgaming" - type : "integer" - mode : "nullable" - - name : "grsalesinvent" - type : "integer" - mode : "nullable" - - name : "lesscstofgoods" - type : "integer" - mode : "nullable" - - name : "netincsales" - type : "integer" - mode : "nullable" - - name : "miscrev11acd" - type : "integer" - mode : "nullable" - - name : "miscrevtota" - type : "integer" - mode : "nullable" - - name : "miscrev11bcd" - type : "integer" - mode : "nullable" - - name : "miscrevtot11b" - type : "integer" - mode : "nullable" - - name : "miscrev11ccd" - type : "integer" - mode : "nullable" - - name : "miscrevtot11c" - type : "integer" - mode : "nullable" - - name : "miscrevtot11d" - type : "integer" - mode : "nullable" - - name : "miscrevtot11e" - type : "integer" - mode : "nullable" - - name : "totrevenue" - type : "integer" - mode : "nullable" - - name : "grntstogovt" - type : "integer" - mode : "nullable" - - name : "grnsttoindiv" - type : "integer" - mode : "nullable" - - name : "grntstofrgngovt" - type : "integer" - mode : "nullable" - - name : "benifitsmembrs" - type : "integer" - mode : "nullable" - - name : "compnsatncurrofcr" - type : "integer" - mode : "nullable" - - name : "compnsatnandothr" - type : "integer" - mode : "nullable" - - name : "othrsalwages" - type : "integer" - mode : "nullable" - - name : "pensionplancontrb" - type : "integer" - mode : "nullable" - - name : "othremplyeebenef" - type : "integer" - mode : "nullable" - - name : "payrolltx" - type : "integer" - mode : "nullable" - - name : "feesforsrvcmgmt" - type : "integer" - mode : "nullable" - - name : "legalfees" - type : "integer" - mode : "nullable" - - name : "accntingfees" - type : "integer" - mode : "nullable" - - name : "feesforsrvclobby" - type : "integer" - mode : "nullable" - - name : "profndraising" - type : "integer" - mode : "nullable" - - name : "feesforsrvcinvstmgmt" - type : "integer" - mode : "nullable" - - name : "feesforsrvcothr" - type : "integer" - mode : "nullable" - - name : "advrtpromo" - type : "integer" - mode : "nullable" - - name : "officexpns" - type : "integer" - mode : "nullable" - - name : "infotech" - type : "integer" - mode : "nullable" - - name : "royaltsexpns" - type : "integer" - mode : "nullable" - - name : "occupancy" - type : "integer" - mode : "nullable" - - name : "travel" - type : "integer" - mode : "nullable" - - name : "travelofpublicoffcl" - type : "integer" - mode : "nullable" - - name : "converconventmtng" - type : "integer" - mode : "nullable" - - name : "interestamt" - type : "integer" - mode : "nullable" - - name : "pymtoaffiliates" - type : "integer" - mode : "nullable" - - name : "deprcatndepletn" - type : "integer" - mode : "nullable" - - name : "insurance" - type : "integer" - mode : "nullable" - - name : "othrexpnsa" - type : "integer" - mode : "nullable" - - name : "othrexpnsb" - type : "integer" - mode : "nullable" - - name : "othrexpnsc" - type : "integer" - mode : "nullable" - - name : "othrexpnsd" - type : "integer" - mode : "nullable" - - name : "othrexpnse" - type : "integer" - mode : "nullable" - - name : "othrexpnsf" - type : "integer" - mode : "nullable" - - name : "totfuncexpns" - type : "integer" - mode : "nullable" - - name : "nonintcashend" - type : "integer" - mode : "nullable" - - name : "svngstempinvend" - type : "integer" - mode : "nullable" - - name : "pldgegrntrcvblend" - type : "integer" - mode : "nullable" - - name : "accntsrcvblend" - type : "integer" - mode : "nullable" - - name : "currfrmrcvblend" - type : "integer" - mode : "nullable" - - name : "rcvbldisqualend" - type : "integer" - mode : "nullable" - - name : "notesloansrcvblend" - type : "integer" - mode : "nullable" - - name : "invntriesalesend" - type : "integer" - mode : "nullable" - - name : "prepaidexpnsend" - type : "integer" - mode : "nullable" - - name : "lndbldgsequipend" - type : "integer" - mode : "nullable" - - name : "invstmntsend" - type : "integer" - mode : "nullable" - - name : "invstmntsothrend" - type : "integer" - mode : "nullable" - - name : "invstmntsprgmend" - type : "integer" - mode : "nullable" - - name : "intangibleassetsend" - type : "integer" - mode : "nullable" - - name : "othrassetsend" - type : "integer" - mode : "nullable" - - name : "totassetsend" - type : "integer" - mode : "nullable" - - name : "accntspayableend" - type : "integer" - mode : "nullable" - - name : "grntspayableend" - type : "integer" - mode : "nullable" - - name : "deferedrevnuend" - type : "integer" - mode : "nullable" - - name : "txexmptbndsend" - type : "integer" - mode : "nullable" - - name : "escrwaccntliabend" - type : "integer" - mode : "nullable" - - name : "paybletoffcrsend" - type : "integer" - mode : "nullable" - - name : "secrdmrtgsend" - type : "integer" - mode : "nullable" - - name : "unsecurednotesend" - type : "integer" - mode : "nullable" - - name : "othrliabend" - type : "integer" - mode : "nullable" - - name : "totliabend" - type : "integer" - mode : "nullable" - - name : "unrstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "temprstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "permrstrctnetasstsend" - type : "integer" - mode : "nullable" - - name : "capitalstktrstend" - type : "integer" - mode : "nullable" - - name : "paidinsurplusend" - type : "integer" - mode : "nullable" - - name : "retainedearnend" - type : "integer" - mode : "nullable" - - name : "totnetassetend" - type : "integer" - mode : "nullable" - - name : "totnetliabastend" - type : "integer" - mode : "nullable" - - name : "nonpfrea" - type : "integer" - mode : "nullable" - - name : "totnooforgscnt" - type : "integer" - mode : "nullable" - - name : "totsupport" - type : "integer" - mode : "nullable" - - name : "gftgrntsrcvd170" - type : "integer" - mode : "nullable" - - name : "txrevnuelevied170" - type : "integer" - mode : "nullable" - - name : "srvcsval170" - type : "integer" - mode : "nullable" - - name : "pubsuppsubtot170" - type : "integer" - mode : "nullable" - - name : "exceeds2pct170" - type : "integer" - mode : "nullable" - - name : "pubsupplesspct170" - type : "integer" - mode : "nullable" - - name : "samepubsuppsubtot170" - type : "integer" - mode : "nullable" - - name : "grsinc170" - type : "integer" - mode : "nullable" - - name : "netincunreltd170" - type : "integer" - mode : "nullable" - - name : "othrinc170" - type : "integer" - mode : "nullable" - - name : "totsupp170" - type : "integer" - mode : "nullable" - - name : "grsrcptsrelated170" - type : "integer" - mode : "nullable" - - name : "totgftgrntrcvd509" - type : "integer" - mode : "nullable" - - name : "grsrcptsadmissn509" - type : "integer" - mode : "nullable" - - name : "grsrcptsactivities509" - type : "integer" - mode : "nullable" - - name : "txrevnuelevied509" - type : "integer" - mode : "nullable" - - name : "srvcsval509" - type : "integer" - mode : "nullable" - - name : "pubsuppsubtot509" - type : "integer" - mode : "nullable" - - name : "rcvdfrmdisqualsub509" - type : "integer" - mode : "nullable" - - name : "exceeds1pct509" - type : "integer" - mode : "nullable" - - name : "subtotpub509" - type : "integer" - mode : "nullable" - - name : "pubsupplesub509" - type : "integer" - mode : "nullable" - - name : "samepubsuppsubtot509" - type : "integer" - mode : "nullable" - - name : "grsinc509" - type : "integer" - mode : "nullable" - - name : "unreltxincls511tx509" - type : "integer" - mode : "nullable" - - name : "subtotsuppinc509" - type : "integer" - mode : "nullable" - - name : "netincunrelatd509" - type : "integer" - mode : "nullable" - - name : "othrinc509" - type : "integer" - mode : "nullable" - - name : "totsupp509" - type : "integer" - mode : "nullable" + - name: "ein" + type: "string" + mode: "required" + - name: "elf" + type: "string" + mode: "nullable" + - name: "tax_pd" + type: "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" + mode: "nullable" + - name: "s501c3or4947a1cd" + type: "string" + mode: "nullable" + - name: "schdbind" + type: "string" + mode: "nullable" + - name: "politicalactvtscd" + type: "string" + mode: "nullable" + - name: "lbbyingactvtscd" + type: "string" + mode: "nullable" + - name: "subjto6033cd" + type: "string" + mode: "nullable" + - name: "dnradvisedfundscd" + type: "string" + mode: "nullable" + - name: "prptyintrcvdcd" + type: "string" + mode: "nullable" + - name: "maintwrkofartcd" + type: "string" + mode: "nullable" + - name: "crcounselingqstncd" + type: "string" + mode: "nullable" + - name: "hldassetsintermpermcd" + type: "string" + mode: "nullable" + - name: "rptlndbldgeqptcd" + type: "string" + mode: "nullable" + - name: "rptinvstothsecd" + type: "string" + mode: "nullable" + - name: "rptinvstprgrelcd" + type: "string" + mode: "nullable" + - name: "rptothasstcd" + type: "string" + mode: "nullable" + - name: "rptothliabcd" + type: "string" + mode: "nullable" + - name: "sepcnsldtfinstmtcd" + type: "string" + mode: "nullable" + - name: "sepindaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "inclinfinstmtcd" + type: "string" + mode: "nullable" + - name: "operateschools170cd" + type: "string" + mode: "nullable" + - name: "frgnofficecd" + type: "string" + mode: "nullable" + - name: "frgnrevexpnscd" + type: "string" + mode: "nullable" + - name: "frgngrntscd" + type: "string" + mode: "nullable" + - name: "frgnaggragrntscd" + type: "string" + mode: "nullable" + - name: "rptprofndrsngfeescd" + type: "string" + mode: "nullable" + - name: "rptincfnndrsngcd" + type: "string" + mode: "nullable" + - name: "rptincgamingcd" + type: "string" + mode: "nullable" + - name: "operatehosptlcd" + type: "string" + mode: "nullable" + - name: "hospaudfinstmtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstogovtcd" + type: "string" + mode: "nullable" + - name: "rptgrntstoindvcd" + type: "string" + mode: "nullable" + - name: "rptyestocompnstncd" + type: "string" + mode: "nullable" + - name: "txexmptbndcd" + type: "string" + mode: "nullable" + - name: "invstproceedscd" + type: "string" + mode: "nullable" + - name: "maintescrwaccntcd" + type: "string" + mode: "nullable" + - name: "actonbehalfcd" + type: "string" + mode: "nullable" + - name: "engageexcessbnftcd" + type: "string" + mode: "nullable" + - name: "awarexcessbnftcd" + type: "string" + mode: "nullable" + - name: "loantofficercd" + type: "string" + mode: "nullable" + - name: "grantoofficercd" + type: "string" + mode: "nullable" + - name: "dirbusnreltdcd" + type: "string" + mode: "nullable" + - name: "fmlybusnreltdcd" + type: "string" + mode: "nullable" + - name: "servasofficercd" + type: "string" + mode: "nullable" + - name: "recvnoncashcd" + type: "string" + mode: "nullable" + - name: "recvartcd" + type: "string" + mode: "nullable" + - name: "ceaseoperationscd" + type: "string" + mode: "nullable" + - name: "sellorexchcd" + type: "string" + mode: "nullable" + - name: "ownsepentcd" + type: "string" + mode: "nullable" + - name: "reltdorgcd" + type: "string" + mode: "nullable" + - name: "intincntrlcd" + type: "string" + mode: "nullable" + - name: "orgtrnsfrcd" + type: "string" + mode: "nullable" + - name: "conduct5percentcd" + type: "string" + mode: "nullable" + - name: "compltschocd" + type: "string" + mode: "nullable" + - name: "f1096cnt" + type: "integer" + mode: "nullable" + - name: "fw2gcnt" + type: "integer" + mode: "nullable" + - name: "wthldngrulescd" + type: "string" + mode: "nullable" + - name: "noemplyeesw3cnt" + type: "integer" + mode: "nullable" + - name: "filerqrdrtnscd" + type: "string" + mode: "nullable" + - name: "unrelbusinccd" + type: "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + mode: "nullable" + - name: "frgnacctcd" + type: "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" + mode: "nullable" + - name: "prtynotifyorgcd" + type: "string" + mode: "nullable" + - name: "filedf8886tcd" + type: "string" + mode: "nullable" + - name: "solicitcntrbcd" + type: "string" + mode: "nullable" + - name: "exprstmntcd" + type: "string" + mode: "nullable" + - name: "providegoodscd" + type: "string" + mode: "nullable" + - name: "notfydnrvalcd" + type: "string" + mode: "nullable" + - name: "filedf8282cd" + type: "string" + mode: "nullable" + - name: "f8282cnt" + type: "integer" + mode: "nullable" + - name: "fndsrcvdcd" + type: "string" + mode: "nullable" + - name: "premiumspaidcd" + type: "string" + mode: "nullable" + - name: "filedf8899cd" + type: "string" + mode: "nullable" + - name: "filedf1098ccd" + type: "string" + mode: "nullable" + - name: "excbushldngscd" + type: "string" + mode: "nullable" + - name: "s4966distribcd" + type: "string" + mode: "nullable" + - name: "distribtodonorcd" + type: "string" + mode: "nullable" + - name: "initiationfees" + type: "integer" + mode: "nullable" + - name: "grsrcptspublicuse" + type: "integer" + mode: "nullable" + - name: "grsincmembers" + type: "integer" + mode: "nullable" + - name: "grsincother" + type: "integer" + mode: "nullable" + - name: "filedlieuf1041cd" + type: "string" + mode: "nullable" + - name: "txexmptint" + type: "integer" + mode: "nullable" + - name: "qualhlthplncd" + type: "string" + mode: "nullable" + - name: "qualhlthreqmntn" + type: "integer" + mode: "nullable" + - name: "qualhlthonhnd" + type: "integer" + mode: "nullable" + - name: "rcvdpdtngcd" + type: "string" + mode: "nullable" + - name: "filedf720cd" + type: "string" + mode: "nullable" + - name: "totreprtabled" + type: "integer" + mode: "nullable" + - name: "totcomprelatede" + type: "integer" + mode: "nullable" + - name: "totestcompf" + type: "integer" + mode: "nullable" + - name: "noindiv100kcnt" + type: "integer" + mode: "nullable" + - name: "nocontractor100kcnt" + type: "integer" + mode: "nullable" + - name: "totcntrbgfts" + type: "integer" + mode: "nullable" + - name: "prgmservcode2acd" + type: "integer" + mode: "nullable" + - name: "totrev2acola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2bcd" + type: "integer" + mode: "nullable" + - name: "totrev2bcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ccd" + type: "integer" + mode: "nullable" + - name: "totrev2ccola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2dcd" + type: "integer" + mode: "nullable" + - name: "totrev2dcola" + type: "integer" + mode: "nullable" + - name: "prgmservcode2ecd" + type: "integer" + mode: "nullable" + - name: "totrev2ecola" + type: "integer" + mode: "nullable" + - name: "totrev2fcola" + type: "integer" + mode: "nullable" + - name: "totprgmrevnue" + type: "integer" + mode: "nullable" + - name: "invstmntinc" + type: "integer" + mode: "nullable" + - name: "txexmptbndsproceeds" + type: "integer" + mode: "nullable" + - name: "royaltsinc" + type: "integer" + mode: "nullable" + - name: "grsrntsreal" + type: "integer" + mode: "nullable" + - name: "grsrntsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlexpnsreal" + type: "integer" + mode: "nullable" + - name: "rntlexpnsprsnl" + type: "integer" + mode: "nullable" + - name: "rntlincreal" + type: "integer" + mode: "nullable" + - name: "rntlincprsnl" + type: "integer" + mode: "nullable" + - name: "netrntlinc" + type: "integer" + mode: "nullable" + - name: "grsalesecur" + type: "integer" + mode: "nullable" + - name: "grsalesothr" + type: "integer" + mode: "nullable" + - name: "cstbasisecur" + type: "integer" + mode: "nullable" + - name: "cstbasisothr" + type: "integer" + mode: "nullable" + - name: "gnlsecur" + type: "integer" + mode: "nullable" + - name: "gnlsothr" + type: "integer" + mode: "nullable" + - name: "netgnls" + type: "integer" + mode: "nullable" + - name: "grsincfndrsng" + type: "integer" + mode: "nullable" + - name: "lessdirfndrsng" + type: "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" + mode: "nullable" + - name: "lessdirgaming" + type: "integer" + mode: "nullable" + - name: "netincgaming" + type: "integer" + mode: "nullable" + - name: "grsalesinvent" + type: "integer" + mode: "nullable" + - name: "lesscstofgoods" + type: "integer" + mode: "nullable" + - name: "netincsales" + type: "integer" + mode: "nullable" + - name: "miscrev11acd" + type: "integer" + mode: "nullable" + - name: "miscrevtota" + type: "integer" + mode: "nullable" + - name: "miscrev11bcd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11b" + type: "integer" + mode: "nullable" + - name: "miscrev11ccd" + type: "integer" + mode: "nullable" + - name: "miscrevtot11c" + type: "integer" + mode: "nullable" + - name: "miscrevtot11d" + type: "integer" + mode: "nullable" + - name: "miscrevtot11e" + type: "integer" + mode: "nullable" + - name: "totrevenue" + type: "integer" + mode: "nullable" + - name: "grntstogovt" + type: "integer" + mode: "nullable" + - name: "grnsttoindiv" + type: "integer" + mode: "nullable" + - name: "grntstofrgngovt" + type: "integer" + mode: "nullable" + - name: "benifitsmembrs" + type: "integer" + mode: "nullable" + - name: "compnsatncurrofcr" + type: "integer" + mode: "nullable" + - name: "compnsatnandothr" + type: "integer" + mode: "nullable" + - name: "othrsalwages" + type: "integer" + mode: "nullable" + - name: "pensionplancontrb" + type: "integer" + mode: "nullable" + - name: "othremplyeebenef" + type: "integer" + mode: "nullable" + - name: "payrolltx" + type: "integer" + mode: "nullable" + - name: "feesforsrvcmgmt" + type: "integer" + mode: "nullable" + - name: "legalfees" + type: "integer" + mode: "nullable" + - name: "accntingfees" + type: "integer" + mode: "nullable" + - name: "feesforsrvclobby" + type: "integer" + mode: "nullable" + - name: "profndraising" + type: "integer" + mode: "nullable" + - name: "feesforsrvcinvstmgmt" + type: "integer" + mode: "nullable" + - name: "feesforsrvcothr" + type: "integer" + mode: "nullable" + - name: "advrtpromo" + type: "integer" + mode: "nullable" + - name: "officexpns" + type: "integer" + mode: "nullable" + - name: "infotech" + type: "integer" + mode: "nullable" + - name: "royaltsexpns" + type: "integer" + mode: "nullable" + - name: "occupancy" + type: "integer" + mode: "nullable" + - name: "travel" + type: "integer" + mode: "nullable" + - name: "travelofpublicoffcl" + type: "integer" + mode: "nullable" + - name: "converconventmtng" + type: "integer" + mode: "nullable" + - name: "interestamt" + type: "integer" + mode: "nullable" + - name: "pymtoaffiliates" + type: "integer" + mode: "nullable" + - name: "deprcatndepletn" + type: "integer" + mode: "nullable" + - name: "insurance" + type: "integer" + mode: "nullable" + - name: "othrexpnsa" + type: "integer" + mode: "nullable" + - name: "othrexpnsb" + type: "integer" + mode: "nullable" + - name: "othrexpnsc" + type: "integer" + mode: "nullable" + - name: "othrexpnsd" + type: "integer" + mode: "nullable" + - name: "othrexpnse" + type: "integer" + mode: "nullable" + - name: "othrexpnsf" + type: "integer" + mode: "nullable" + - name: "totfuncexpns" + type: "integer" + mode: "nullable" + - name: "nonintcashend" + type: "integer" + mode: "nullable" + - name: "svngstempinvend" + type: "integer" + mode: "nullable" + - name: "pldgegrntrcvblend" + type: "integer" + mode: "nullable" + - name: "accntsrcvblend" + type: "integer" + mode: "nullable" + - name: "currfrmrcvblend" + type: "integer" + mode: "nullable" + - name: "rcvbldisqualend" + type: "integer" + mode: "nullable" + - name: "notesloansrcvblend" + type: "integer" + mode: "nullable" + - name: "invntriesalesend" + type: "integer" + mode: "nullable" + - name: "prepaidexpnsend" + type: "integer" + mode: "nullable" + - name: "lndbldgsequipend" + type: "integer" + mode: "nullable" + - name: "invstmntsend" + type: "integer" + mode: "nullable" + - name: "invstmntsothrend" + type: "integer" + mode: "nullable" + - name: "invstmntsprgmend" + type: "integer" + mode: "nullable" + - name: "intangibleassetsend" + type: "integer" + mode: "nullable" + - name: "othrassetsend" + type: "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" + mode: "nullable" + - name: "accntspayableend" + type: "integer" + mode: "nullable" + - name: "grntspayableend" + type: "integer" + mode: "nullable" + - name: "deferedrevnuend" + type: "integer" + mode: "nullable" + - name: "txexmptbndsend" + type: "integer" + mode: "nullable" + - name: "escrwaccntliabend" + type: "integer" + mode: "nullable" + - name: "paybletoffcrsend" + type: "integer" + mode: "nullable" + - name: "secrdmrtgsend" + type: "integer" + mode: "nullable" + - name: "unsecurednotesend" + type: "integer" + mode: "nullable" + - name: "othrliabend" + type: "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" + mode: "nullable" + - name: "unrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "temprstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "permrstrctnetasstsend" + type: "integer" + mode: "nullable" + - name: "capitalstktrstend" + type: "integer" + mode: "nullable" + - name: "paidinsurplusend" + type: "integer" + mode: "nullable" + - name: "retainedearnend" + type: "integer" + mode: "nullable" + - name: "totnetassetend" + type: "integer" + mode: "nullable" + - name: "totnetliabastend" + type: "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" + mode: "nullable" graph_paths: - "irs_990_2017_transform_csv >> load_irs_990_2017_to_bq" - - - - - \ No newline at end of file diff --git a/datasets/irs_990/irs_990_ez_2014/pipeline.yaml b/datasets/irs_990/irs_990_ez_2014/pipeline.yaml index e0dc5c342..f12770df9 100644 --- a/datasets/irs_990/irs_990_ez_2014/pipeline.yaml +++ b/datasets/irs_990/irs_990_ez_2014/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_ez_2014 # Description of the table - description: "irs_990_ez_2014 dataset" + description: "IRS 990 EZ 2014 dataset" dag: airflow_version: 1 @@ -33,7 +33,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "@daily" + schedule_interval: "@daily" catchup: False default_view: graph @@ -107,296 +107,291 @@ dag: # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" + - name: "ein" + type: "string" description: "Employer Identification Number" - mode : "required" - - name : "tax_pd" - type : "integer" + mode: "required" + - name: "tax_pd" + type: "integer" description: "Tax period" - mode : "nullable" - - name : "subseccd" - type : "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" description: "Subsection code" - mode : "nullable" - - name : "totcntrbs" - type : "integer" + mode: "nullable" + - name: "totcntrbs" + type: "integer" description: "Contributions gifts grants etc received" - mode : "nullable" - - name : "prgmservrev" - type : "integer" + mode: "nullable" + - name: "prgmservrev" + type: "integer" description: "Program service revenue" - mode : "nullable" - - name : "duesassesmnts" - type : "integer" + mode: "nullable" + - name: "duesassesmnts" + type: "integer" description: "Membership dues and assessments" - mode : "nullable" - - name : "othrinvstinc" - type : "integer" + mode: "nullable" + - name: "othrinvstinc" + type: "integer" description: "Investment income" - mode : "nullable" - - name : "grsamtsalesastothr" - type : "integer" + mode: "nullable" + - name: "grsamtsalesastothr" + type: "integer" description: "Gross amount from sale of assets" - mode : "nullable" - - name : "basisalesexpnsothr" - type : "integer" + mode: "nullable" + - name: "basisalesexpnsothr" + type: "integer" description: "Cost or other basis and sales expenses" - mode : "nullable" - - name : "gnsaleofastothr" - type : "integer" + mode: "nullable" + - name: "gnsaleofastothr" + type: "integer" description: "Gain or (loss) from sale of assets" - mode : "nullable" - - name : "grsincgaming" - type : "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" description: "Gross income from gaming" - mode : "nullable" - - name : "grsrevnuefndrsng" - type : "integer" + mode: "nullable" + - name: "grsrevnuefndrsng" + type: "integer" description: "Special events gross revenue" - mode : "nullable" - - name : "direxpns" - type : "integer" + mode: "nullable" + - name: "direxpns" + type: "integer" description: "Special events direct expenses" - mode : "nullable" - - name : "netincfndrsng" - type : "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" description: "Special events net income (or loss)" - mode : "nullable" - - name : "grsalesminusret" - type : "integer" + mode: "nullable" + - name: "grsalesminusret" + type: "integer" description: "Gross sales of inventory" - mode : "nullable" - - name : "costgoodsold" - type : "integer" + mode: "nullable" + - name: "costgoodsold" + type: "integer" description: "Less: cost of goods sold" - mode : "nullable" - - name : "grsprft" - type : "integer" + mode: "nullable" + - name: "grsprft" + type: "integer" description: "Gross profit (or loss) from sales of inventory" - mode : "nullable" - - name : "othrevnue" - type : "integer" + mode: "nullable" + - name: "othrevnue" + type: "integer" description: "Other revenue - total" - mode : "nullable" - - name : "totrevnue" - type : "integer" + mode: "nullable" + - name: "totrevnue" + type: "integer" description: "Total revenue" - mode : "nullable" - - name : "totexpns" - type : "integer" + mode: "nullable" + - name: "totexpns" + type: "integer" description: "Total expenses" - mode : "nullable" - - name : "totexcessyr" - type : "integer" + mode: "nullable" + - name: "totexcessyr" + type: "integer" description: "Excess or deficit" - mode : "nullable" - - name : "othrchgsnetassetfnd" - type : "integer" + mode: "nullable" + - name: "othrchgsnetassetfnd" + type: "integer" description: "Other changes in net assets" - mode : "nullable" - - name : "networthend" - type : "integer" + mode: "nullable" + - name: "networthend" + type: "integer" description: "Net assets EOY" - mode : "nullable" - - name : "totassetsend" - type : "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" description: "Total assets e-o-y" - mode : "nullable" - - name : "totliabend" - type : "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" description: "Total liabilities e-o-y" - mode : "nullable" - - name : "totnetassetsend" - type : "integer" + mode: "nullable" + - name: "totnetassetsend" + type: "integer" description: "Total net worth e-o-y" - mode : "nullable" - - name : "actvtynotprevrptcd" - type : "string" + mode: "nullable" + - name: "actvtynotprevrptcd" + type: "string" description: "Activity not previously reported?" - mode : "nullable" - - name : "chngsinorgcd" - type : "string" + mode: "nullable" + - name: "chngsinorgcd" + type: "string" description: "Significant changes to governing docs?" - mode : "nullable" - - name : "unrelbusincd" - type : "string" + mode: "nullable" + - name: "unrelbusincd" + type: "string" description: "UBI over $1000?" - mode : "nullable" - - name : "filedf990tcd" - type : "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" description: "Organization Filed 990T" - mode : "nullable" - - name : "contractioncd" - type : "string" + mode: "nullable" + - name: "contractioncd" + type: "string" description: "Liquidation dissolution termination or contraction" - mode : "nullable" - - name : "politicalexpend" - type : "integer" + mode: "nullable" + - name: "politicalexpend" + type: "integer" description: "Direct or indirect political expenditures" - mode : "nullable" - - name : "filedf1120polcd" - type : "string" + mode: "nullable" + - name: "filedf1120polcd" + type: "string" description: "File Form 1120-POL?" - mode : "nullable" - - name : "loanstoofficerscd" - type : "string" + mode: "nullable" + - name: "loanstoofficerscd" + type: "string" description: "Loans to/from officers directors or trustees?" - mode : "nullable" - - name : "loanstoofficers" - type : "integer" + mode: "nullable" + - name: "loanstoofficers" + type: "integer" description: "Amount of loans to/from officers" - mode : "nullable" - - name : "initiationfee" - type : "integer" + mode: "nullable" + - name: "initiationfee" + type: "integer" description: "Initiation fees and capital contributions" - mode : "nullable" - - name : "grspublicrcpts" - type : "integer" + mode: "nullable" + - name: "grspublicrcpts" + type: "integer" description: "Gross receipts for public use of club facilities" - mode : "nullable" - - name : "s4958excessbenefcd" - type : "string" + mode: "nullable" + - name: "s4958excessbenefcd" + type: "string" description: "Section 4958 excess benefit transactions?" - mode : "nullable" - - name : "prohibtdtxshltrcd" - type : "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" description: "Party to a prohibited tax shelter transaction?" - mode : "nullable" - - name : "nonpfrea" - type : "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" description: "Reason for non-PF status" - mode : "nullable" - - name : "totnooforgscnt" - type : "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" description: "Number of organizations supported" - mode : "nullable" - - name : "totsupport" - type : "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" description: "Sum of amounts of support" - mode : "nullable" - - name : "gftgrntsrcvd170" - type : "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" description: "Gifts grants membership fees received (170)" - mode : "nullable" - - name : "txrevnuelevied170" - type : "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" description: "Tax revenues levied (170)" - mode : "nullable" - - name : "srvcsval170" - type : "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" description: "Services or facilities furnished by gov (170)" - mode : "nullable" - - name : "pubsuppsubtot170" - type : "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" description: "Public support subtotal (170)" - mode : "nullable" - - name : "exceeds2pct170" - type : "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" description: "Amount support exceeds total (170)" - mode : "nullable" - - name : "pubsupplesspct170" - type : "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" description: "Public support (170)" - mode : "nullable" - - name : "samepubsuppsubtot170" - type : "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" description: "Public support from line 4 (170)" - mode : "nullable" - - name : "grsinc170" - type : "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" description: "Gross income from interest etc (170)" - mode : "nullable" - - name : "netincunreltd170" - type : "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" description: "Net UBI (170)" - mode : "nullable" - - name : "othrinc170" - type : "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" description: "Other income (170)" - mode : "nullable" - - name : "totsupp170" - type : "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" description: "Total support (170)" - mode : "nullable" - - name : "grsrcptsrelated170" - type : "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" description: "Gross receipts from related activities (170)" - mode : "nullable" - - name : "totgftgrntrcvd509" - type : "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" description: "Gifts grants membership fees received (509)" - mode : "nullable" - - name : "grsrcptsadmissn509" - type : "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" description: "Receipts from admissions merchandise etc (509)" - mode : "nullable" - - name : "grsrcptsactivities509" - type : "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" description: "Gross receipts from related activities (509)" - mode : "nullable" - - name : "txrevnuelevied509" - type : "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" description: "Tax revenues levied (509)" - mode : "nullable" - - name : "srvcsval509" - type : "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" description: "Services or facilities furnished by gov (509)" - mode : "nullable" - - name : "pubsuppsubtot509" - type : "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" description: "Public support subtotal (509)" - mode : "nullable" - - name : "rcvdfrmdisqualsub509" - type : "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" description: "Amounts from disqualified persons (509)" - mode : "nullable" - - name : "exceeds1pct509" - type : "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" description: "Amount support exceeds total (509)" - mode : "nullable" - - name : "subtotpub509" - type : "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" description: "Public support subtotal (509)" - mode : "nullable" - - name : "pubsupplesub509" - type : "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" description: "Public support (509)" - mode : "nullable" - - name : "samepubsuppsubtot509" - type : "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" description: "Public support from line 6 (509)" - mode : "nullable" - - name : "grsinc509" - type : "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" description: "Gross income from interest etc (509)" - mode : "nullable" - - name : "unreltxincls511tx509" - type : "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" description: "Net UBI (509)" - mode : "nullable" - - name : "subtotsuppinc509" - type : "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" description: "Subtotal total support (509)" - mode : "nullable" - - name : "netincunrelatd509" - type : "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" description: "Net income from UBI not in 10b (509)" - mode : "nullable" - - name : "othrinc509" - type : "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" description: "Other income (509)" - mode : "nullable" - - name : "totsupp509" - type : "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" description: "Total support (509)" - mode : "nullable" + mode: "nullable" graph_paths: - "irs_990_ez_2014_transform_csv >> load_irs_990_ez_2014_to_bq" - - - - - \ No newline at end of file diff --git a/datasets/irs_990/irs_990_ez_2015/pipeline.yaml b/datasets/irs_990/irs_990_ez_2015/pipeline.yaml index b5ef42660..aeb6fccce 100644 --- a/datasets/irs_990/irs_990_ez_2015/pipeline.yaml +++ b/datasets/irs_990/irs_990_ez_2015/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_ez_2015 # Description of the table - description: "irs_990_ez_2015 dataset" + description: "IRS 990 EZ 2015 dataset" dag: airflow_version: 1 @@ -33,7 +33,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "@daily" + schedule_interval: "@daily" catchup: False default_view: graph @@ -107,300 +107,295 @@ dag: # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" + - name: "ein" + type: "string" description: "Employer Identification Number" - mode : "required" - - name : "elf" - type : "string" + mode: "required" + - name: "elf" + type: "string" description: "E-file indicator" - mode : "nullable" - - name : "tax_pd" - type : "integer" + mode: "nullable" + - name: "tax_pd" + type: "integer" description: "Tax period" - mode : "nullable" - - name : "subseccd" - type : "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" description: "Subsection code" - mode : "nullable" - - name : "totcntrbs" - type : "integer" + mode: "nullable" + - name: "totcntrbs" + type: "integer" description: "Contributions gifts grants etc received" - mode : "nullable" - - name : "prgmservrev" - type : "integer" + mode: "nullable" + - name: "prgmservrev" + type: "integer" description: "Program service revenue" - mode : "nullable" - - name : "duesassesmnts" - type : "integer" + mode: "nullable" + - name: "duesassesmnts" + type: "integer" description: "Membership dues and assessments" - mode : "nullable" - - name : "othrinvstinc" - type : "integer" + mode: "nullable" + - name: "othrinvstinc" + type: "integer" description: "Investment income" - mode : "nullable" - - name : "grsamtsalesastothr" - type : "integer" + mode: "nullable" + - name: "grsamtsalesastothr" + type: "integer" description: "Gross amount from sale of assets" - mode : "nullable" - - name : "basisalesexpnsothr" - type : "integer" + mode: "nullable" + - name: "basisalesexpnsothr" + type: "integer" description: "Cost or other basis and sales expenses" - mode : "nullable" - - name : "gnsaleofastothr" - type : "integer" + mode: "nullable" + - name: "gnsaleofastothr" + type: "integer" description: "Gain or (loss) from sale of assets" - mode : "nullable" - - name : "grsincgaming" - type : "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" description: "Gross income from gaming" - mode : "nullable" - - name : "grsrevnuefndrsng" - type : "integer" + mode: "nullable" + - name: "grsrevnuefndrsng" + type: "integer" description: "Special events gross revenue" - mode : "nullable" - - name : "direxpns" - type : "integer" + mode: "nullable" + - name: "direxpns" + type: "integer" description: "Special events direct expenses" - mode : "nullable" - - name : "netincfndrsng" - type : "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" description: "Special events net income (or loss)" - mode : "nullable" - - name : "grsalesminusret" - type : "integer" + mode: "nullable" + - name: "grsalesminusret" + type: "integer" description: "Gross sales of inventory" - mode : "nullable" - - name : "costgoodsold" - type : "integer" + mode: "nullable" + - name: "costgoodsold" + type: "integer" description: "Less: cost of goods sold" - mode : "nullable" - - name : "grsprft" - type : "integer" + mode: "nullable" + - name: "grsprft" + type: "integer" description: "Gross profit (or loss) from sales of inventory" - mode : "nullable" - - name : "othrevnue" - type : "integer" + mode: "nullable" + - name: "othrevnue" + type: "integer" description: "Other revenue - total" - mode : "nullable" - - name : "totrevnue" - type : "integer" + mode: "nullable" + - name: "totrevnue" + type: "integer" description: "Total revenue" - mode : "nullable" - - name : "totexpns" - type : "integer" + mode: "nullable" + - name: "totexpns" + type: "integer" description: "Total expenses" - mode : "nullable" - - name : "totexcessyr" - type : "integer" + mode: "nullable" + - name: "totexcessyr" + type: "integer" description: "Excess or deficit" - mode : "nullable" - - name : "othrchgsnetassetfnd" - type : "integer" + mode: "nullable" + - name: "othrchgsnetassetfnd" + type: "integer" description: "Other changes in net assets" - mode : "nullable" - - name : "networthend" - type : "integer" + mode: "nullable" + - name: "networthend" + type: "integer" description: "Net assets EOY" - mode : "nullable" - - name : "totassetsend" - type : "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" description: "Total assets e-o-y" - mode : "nullable" - - name : "totliabend" - type : "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" description: "Total liabilities e-o-y" - mode : "nullable" - - name : "totnetassetsend" - type : "integer" + mode: "nullable" + - name: "totnetassetsend" + type: "integer" description: "Total net worth e-o-y" - mode : "nullable" - - name : "actvtynotprevrptcd" - type : "string" + mode: "nullable" + - name: "actvtynotprevrptcd" + type: "string" description: "Activity not previously reported?" - mode : "nullable" - - name : "chngsinorgcd" - type : "string" + mode: "nullable" + - name: "chngsinorgcd" + type: "string" description: "Significant changes to governing docs?" - mode : "nullable" - - name : "unrelbusincd" - type : "string" + mode: "nullable" + - name: "unrelbusincd" + type: "string" description: "UBI over $1000?" - mode : "nullable" - - name : "filedf990tcd" - type : "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" description: "Organization Filed 990T" - mode : "nullable" - - name : "contractioncd" - type : "string" + mode: "nullable" + - name: "contractioncd" + type: "string" description: "Liquidation dissolution termination or contraction" - mode : "nullable" - - name : "politicalexpend" - type : "integer" + mode: "nullable" + - name: "politicalexpend" + type: "integer" description: "Direct or indirect political expenditures" - mode : "nullable" - - name : "filedf1120polcd" - type : "string" + mode: "nullable" + - name: "filedf1120polcd" + type: "string" description: "File Form 1120-POL?" - mode : "nullable" - - name : "loanstoofficerscd" - type : "string" + mode: "nullable" + - name: "loanstoofficerscd" + type: "string" description: "Loans to/from officers directors or trustees?" - mode : "nullable" - - name : "loanstoofficers" - type : "integer" + mode: "nullable" + - name: "loanstoofficers" + type: "integer" description: "Amount of loans to/from officers" - mode : "nullable" - - name : "initiationfee" - type : "integer" + mode: "nullable" + - name: "initiationfee" + type: "integer" description: "Initiation fees and capital contributions" - mode : "nullable" - - name : "grspublicrcpts" - type : "integer" + mode: "nullable" + - name: "grspublicrcpts" + type: "integer" description: "Gross receipts for public use of club facilities" - mode : "nullable" - - name : "s4958excessbenefcd" - type : "string" + mode: "nullable" + - name: "s4958excessbenefcd" + type: "string" description: "Section 4958 excess benefit transactions?" - mode : "nullable" - - name : "prohibtdtxshltrcd" - type : "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" description: "Party to a prohibited tax shelter transaction?" - mode : "nullable" - - name : "nonpfrea" - type : "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" description: "Reason for non-PF status" - mode : "nullable" - - name : "totnooforgscnt" - type : "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" description: "Number of organizations supported" - mode : "nullable" - - name : "totsupport" - type : "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" description: "Sum of amounts of support" - mode : "nullable" - - name : "gftgrntsrcvd170" - type : "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" description: "Gifts grants membership fees received (170)" - mode : "nullable" - - name : "txrevnuelevied170" - type : "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" description: "Tax revenues levied (170)" - mode : "nullable" - - name : "srvcsval170" - type : "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" description: "Services or facilities furnished by gov (170)" - mode : "nullable" - - name : "pubsuppsubtot170" - type : "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" description: "Public support subtotal (170)" - mode : "nullable" - - name : "exceeds2pct170" - type : "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" description: "Amount support exceeds total (170)" - mode : "nullable" - - name : "pubsupplesspct170" - type : "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" description: "Public support (170)" - mode : "nullable" - - name : "samepubsuppsubtot170" - type : "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" description: "Public support from line 4 (170)" - mode : "nullable" - - name : "grsinc170" - type : "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" description: "Gross income from interest etc (170)" - mode : "nullable" - - name : "netincunreltd170" - type : "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" description: "Net UBI (170)" - mode : "nullable" - - name : "othrinc170" - type : "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" description: "Other income (170)" - mode : "nullable" - - name : "totsupp170" - type : "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" description: "Total support (170)" - mode : "nullable" - - name : "grsrcptsrelated170" - type : "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" description: "Gross receipts from related activities (170)" - mode : "nullable" - - name : "totgftgrntrcvd509" - type : "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" description: "Gifts grants membership fees received (509)" - mode : "nullable" - - name : "grsrcptsadmissn509" - type : "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" description: "Receipts from admissions merchandise etc (509)" - mode : "nullable" - - name : "grsrcptsactivities509" - type : "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" description: "Gross receipts from related activities (509)" - mode : "nullable" - - name : "txrevnuelevied509" - type : "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" description: "Tax revenues levied (509)" - mode : "nullable" - - name : "srvcsval509" - type : "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" description: "Services or facilities furnished by gov (509)" - mode : "nullable" - - name : "pubsuppsubtot509" - type : "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" description: "Public support subtotal (509)" - mode : "nullable" - - name : "rcvdfrmdisqualsub509" - type : "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" description: "Amounts from disqualified persons (509)" - mode : "nullable" - - name : "exceeds1pct509" - type : "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" description: "Amount support exceeds total (509)" - mode : "nullable" - - name : "subtotpub509" - type : "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" description: "Public support subtotal (509)" - mode : "nullable" - - name : "pubsupplesub509" - type : "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" description: "Public support (509)" - mode : "nullable" - - name : "samepubsuppsubtot509" - type : "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" description: "Public support from line 6 (509)" - mode : "nullable" - - name : "grsinc509" - type : "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" description: "Gross income from interest etc (509)" - mode : "nullable" - - name : "unreltxincls511tx509" - type : "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" description: "Net UBI (509)" - mode : "nullable" - - name : "subtotsuppinc509" - type : "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" description: "Subtotal total support (509)" - mode : "nullable" - - name : "netincunrelatd509" - type : "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" description: "Net income from UBI not in 10b (509)" - mode : "nullable" - - name : "othrinc509" - type : "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" description: "Other income (509)" - mode : "nullable" - - name : "totsupp509" - type : "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" description: "Total support (509)" - mode : "nullable" + mode: "nullable" graph_paths: - "irs_990_ez_2015_transform_csv >> load_irs_990_ez_2015_to_bq" - - - - - \ No newline at end of file diff --git a/datasets/irs_990/irs_990_ez_2016/pipeline.yaml b/datasets/irs_990/irs_990_ez_2016/pipeline.yaml index 7882ff022..3005403da 100644 --- a/datasets/irs_990/irs_990_ez_2016/pipeline.yaml +++ b/datasets/irs_990/irs_990_ez_2016/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_ez_2016 # Description of the table - description: "irs_990_ez_2016 dataset" + description: "IRS 990 EZ 2016 dataset" dag: airflow_version: 1 @@ -33,7 +33,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "@daily" + schedule_interval: "@daily" catchup: False default_view: graph @@ -107,300 +107,295 @@ dag: # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" + - name: "ein" + type: "string" description: "Employer Identification Number" - mode : "required" - - name : "elf" - type : "string" + mode: "required" + - name: "elf" + type: "string" description: "E-file indicator" - mode : "nullable" - - name : "tax_pd" - type : "integer" + mode: "nullable" + - name: "tax_pd" + type: "integer" description: "Tax period" - mode : "nullable" - - name : "subseccd" - type : "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" description: "Subsection code" - mode : "nullable" - - name : "totcntrbs" - type : "integer" + mode: "nullable" + - name: "totcntrbs" + type: "integer" description: "Contributions gifts grants etc received" - mode : "nullable" - - name : "prgmservrev" - type : "integer" + mode: "nullable" + - name: "prgmservrev" + type: "integer" description: "Program service revenue" - mode : "nullable" - - name : "duesassesmnts" - type : "integer" + mode: "nullable" + - name: "duesassesmnts" + type: "integer" description: "Membership dues and assessments" - mode : "nullable" - - name : "othrinvstinc" - type : "integer" + mode: "nullable" + - name: "othrinvstinc" + type: "integer" description: "Investment income" - mode : "nullable" - - name : "grsamtsalesastothr" - type : "integer" + mode: "nullable" + - name: "grsamtsalesastothr" + type: "integer" description: "Gross amount from sale of assets" - mode : "nullable" - - name : "basisalesexpnsothr" - type : "integer" + mode: "nullable" + - name: "basisalesexpnsothr" + type: "integer" description: "Cost or other basis and sales expenses" - mode : "nullable" - - name : "gnsaleofastothr" - type : "integer" + mode: "nullable" + - name: "gnsaleofastothr" + type: "integer" description: "Gain or (loss) from sale of assets" - mode : "nullable" - - name : "grsincgaming" - type : "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" description: "Gross income from gaming" - mode : "nullable" - - name : "grsrevnuefndrsng" - type : "integer" + mode: "nullable" + - name: "grsrevnuefndrsng" + type: "integer" description: "Special events gross revenue" - mode : "nullable" - - name : "direxpns" - type : "integer" + mode: "nullable" + - name: "direxpns" + type: "integer" description: "Special events direct expenses" - mode : "nullable" - - name : "netincfndrsng" - type : "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" description: "Special events net income (or loss)" - mode : "nullable" - - name : "grsalesminusret" - type : "integer" + mode: "nullable" + - name: "grsalesminusret" + type: "integer" description: "Gross sales of inventory" - mode : "nullable" - - name : "costgoodsold" - type : "integer" + mode: "nullable" + - name: "costgoodsold" + type: "integer" description: "Less: cost of goods sold" - mode : "nullable" - - name : "grsprft" - type : "integer" + mode: "nullable" + - name: "grsprft" + type: "integer" description: "Gross profit (or loss) from sales of inventory" - mode : "nullable" - - name : "othrevnue" - type : "integer" + mode: "nullable" + - name: "othrevnue" + type: "integer" description: "Other revenue - total" - mode : "nullable" - - name : "totrevnue" - type : "integer" + mode: "nullable" + - name: "totrevnue" + type: "integer" description: "Total revenue" - mode : "nullable" - - name : "totexpns" - type : "integer" + mode: "nullable" + - name: "totexpns" + type: "integer" description: "Total expenses" - mode : "nullable" - - name : "totexcessyr" - type : "integer" + mode: "nullable" + - name: "totexcessyr" + type: "integer" description: "Excess or deficit" - mode : "nullable" - - name : "othrchgsnetassetfnd" - type : "integer" + mode: "nullable" + - name: "othrchgsnetassetfnd" + type: "integer" description: "Other changes in net assets" - mode : "nullable" - - name : "networthend" - type : "integer" + mode: "nullable" + - name: "networthend" + type: "integer" description: "Net assets EOY" - mode : "nullable" - - name : "totassetsend" - type : "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" description: "Total assets e-o-y" - mode : "nullable" - - name : "totliabend" - type : "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" description: "Total liabilities e-o-y" - mode : "nullable" - - name : "totnetassetsend" - type : "integer" + mode: "nullable" + - name: "totnetassetsend" + type: "integer" description: "Total net worth e-o-y" - mode : "nullable" - - name : "actvtynotprevrptcd" - type : "string" + mode: "nullable" + - name: "actvtynotprevrptcd" + type: "string" description: "Activity not previously reported?" - mode : "nullable" - - name : "chngsinorgcd" - type : "string" + mode: "nullable" + - name: "chngsinorgcd" + type: "string" description: "Significant changes to governing docs?" - mode : "nullable" - - name : "unrelbusincd" - type : "string" + mode: "nullable" + - name: "unrelbusincd" + type: "string" description: "UBI over $1000?" - mode : "nullable" - - name : "filedf990tcd" - type : "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" description: "Organization Filed 990T" - mode : "nullable" - - name : "contractioncd" - type : "string" + mode: "nullable" + - name: "contractioncd" + type: "string" description: "Liquidation dissolution termination or contraction" - mode : "nullable" - - name : "politicalexpend" - type : "integer" + mode: "nullable" + - name: "politicalexpend" + type: "integer" description: "Direct or indirect political expenditures" - mode : "nullable" - - name : "filedf1120polcd" - type : "string" + mode: "nullable" + - name: "filedf1120polcd" + type: "string" description: "File Form 1120-POL?" - mode : "nullable" - - name : "loanstoofficerscd" - type : "string" + mode: "nullable" + - name: "loanstoofficerscd" + type: "string" description: "Loans to/from officers directors or trustees?" - mode : "nullable" - - name : "loanstoofficers" - type : "integer" + mode: "nullable" + - name: "loanstoofficers" + type: "integer" description: "Amount of loans to/from officers" - mode : "nullable" - - name : "initiationfee" - type : "integer" + mode: "nullable" + - name: "initiationfee" + type: "integer" description: "Initiation fees and capital contributions" - mode : "nullable" - - name : "grspublicrcpts" - type : "integer" + mode: "nullable" + - name: "grspublicrcpts" + type: "integer" description: "Gross receipts for public use of club facilities" - mode : "nullable" - - name : "s4958excessbenefcd" - type : "string" + mode: "nullable" + - name: "s4958excessbenefcd" + type: "string" description: "Section 4958 excess benefit transactions?" - mode : "nullable" - - name : "prohibtdtxshltrcd" - type : "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" description: "Party to a prohibited tax shelter transaction?" - mode : "nullable" - - name : "nonpfrea" - type : "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" description: "Reason for non-PF status" - mode : "nullable" - - name : "totnooforgscnt" - type : "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" description: "Number of organizations supported" - mode : "nullable" - - name : "totsupport" - type : "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" description: "Sum of amounts of support" - mode : "nullable" - - name : "gftgrntsrcvd170" - type : "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" description: "Gifts grants membership fees received (170)" - mode : "nullable" - - name : "txrevnuelevied170" - type : "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" description: "Tax revenues levied (170)" - mode : "nullable" - - name : "srvcsval170" - type : "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" description: "Services or facilities furnished by gov (170)" - mode : "nullable" - - name : "pubsuppsubtot170" - type : "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" description: "Public support subtotal (170)" - mode : "nullable" - - name : "exceeds2pct170" - type : "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" description: "Amount support exceeds total (170)" - mode : "nullable" - - name : "pubsupplesspct170" - type : "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" description: "Public support (170)" - mode : "nullable" - - name : "samepubsuppsubtot170" - type : "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" description: "Public support from line 4 (170)" - mode : "nullable" - - name : "grsinc170" - type : "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" description: "Gross income from interest etc (170)" - mode : "nullable" - - name : "netincunreltd170" - type : "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" description: "Net UBI (170)" - mode : "nullable" - - name : "othrinc170" - type : "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" description: "Other income (170)" - mode : "nullable" - - name : "totsupp170" - type : "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" description: "Total support (170)" - mode : "nullable" - - name : "grsrcptsrelated170" - type : "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" description: "Gross receipts from related activities (170)" - mode : "nullable" - - name : "totgftgrntrcvd509" - type : "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" description: "Gifts grants membership fees received (509)" - mode : "nullable" - - name : "grsrcptsadmissn509" - type : "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" description: "Receipts from admissions merchandise etc (509)" - mode : "nullable" - - name : "grsrcptsactivities509" - type : "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" description: "Gross receipts from related activities (509)" - mode : "nullable" - - name : "txrevnuelevied509" - type : "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" description: "Tax revenues levied (509)" - mode : "nullable" - - name : "srvcsval509" - type : "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" description: "Services or facilities furnished by gov (509)" - mode : "nullable" - - name : "pubsuppsubtot509" - type : "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" description: "Public support subtotal (509)" - mode : "nullable" - - name : "rcvdfrmdisqualsub509" - type : "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" description: "Amounts from disqualified persons (509)" - mode : "nullable" - - name : "exceeds1pct509" - type : "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" description: "Amount support exceeds total (509)" - mode : "nullable" - - name : "subtotpub509" - type : "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" description: "Public support subtotal (509)" - mode : "nullable" - - name : "pubsupplesub509" - type : "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" description: "Public support (509)" - mode : "nullable" - - name : "samepubsuppsubtot509" - type : "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" description: "Public support from line 6 (509)" - mode : "nullable" - - name : "grsinc509" - type : "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" description: "Gross income from interest etc (509)" - mode : "nullable" - - name : "unreltxincls511tx509" - type : "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" description: "Net UBI (509)" - mode : "nullable" - - name : "subtotsuppinc509" - type : "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" description: "Subtotal total support (509)" - mode : "nullable" - - name : "netincunrelatd509" - type : "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" description: "Net income from UBI not in 10b (509)" - mode : "nullable" - - name : "othrinc509" - type : "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" description: "Other income (509)" - mode : "nullable" - - name : "totsupp509" - type : "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" description: "Total support (509)" - mode : "nullable" + mode: "nullable" graph_paths: - "irs_990_ez_2016_transform_csv >> load_irs_990_ez_2016_to_bq" - - - - - \ No newline at end of file diff --git a/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py b/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py index c2d52e4c4..7c7037845 100644 --- a/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py +++ b/datasets/irs_990/irs_990_ez_2017/irs_990_ez_2017_dag.py @@ -48,7 +48,7 @@ "TARGET_GCS_PATH": "data/irs_990/irs_990_ez_2017/data_output.csv", "PIPELINE_NAME": "irs_990_ez_2017", "CSV_HEADERS": '["ein","elf","tax_pd","subseccd","totcntrbs","prgmservrev","duesassesmnts","othrinvstinc","grsamtsalesastothr","basisalesexpnsothr","gnsaleofastothr","grsincgaming","grsrevnuefndrsng","direxpns","netincfndrsng","grsalesminusret","costgoodsold","grsprft","othrevnue","totrevnue","totexpns","totexcessyr","othrchgsnetassetfnd","networthend","totassetsend","totliabend","totnetassetsend","actvtynotprevrptcd","chngsinorgcd","unrelbusincd","filedf990tcd","contractioncd","politicalexpend","filedf1120polcd","loanstoofficerscd","loanstoofficers","initiationfee","grspublicrcpts","s4958excessbenefcd","prohibtdtxshltrcd","nonpfrea","totnooforgscnt","totsupport","gftgrntsrcvd170","txrevnuelevied170","srvcsval170","pubsuppsubtot170","exceeds2pct170","pubsupplesspct170","samepubsuppsubtot170","grsinc170","netincunreltd170","othrinc170","totsupp170","grsrcptsrelated170","totgftgrntrcvd509","grsrcptsadmissn509","grsrcptsactivities509","txrevnuelevied509","srvcsval509","pubsuppsubtot509","rcvdfrmdisqualsub509","exceeds1pct509","subtotpub509","pubsupplesub509","samepubsuppsubtot509","grsinc509","unreltxincls511tx509","subtotsuppinc509","netincunrelatd509","othrinc509","totsupp509"]', - "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfeecerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', + "RENAME_MAPPINGS": '{"EIN": "ein","a_tax_prd": "tax_pd","taxpd": "tax_pd","taxprd": "tax_pd","subseccd": "subseccd","prgmservrev": "prgmservrev","duesassesmnts": "duesassesmnts","othrinvstinc": "othrinvstinc","grsamtsalesastothr": "grsamtsalesastothr","basisalesexpnsothr": "basisalesexpnsothr","gnsaleofastothr": "gnsaleofastothr","grsincgaming": "grsincgaming","grsrevnuefndrsng": "grsrevnuefndrsng","direxpns": "direxpns","netincfndrsng": "netincfndrsng","grsalesminusret": "grsalesminusret","costgoodsold": "costgoodsold","grsprft": "grsprft","othrevnue": "othrevnue","totrevnue": "totrevnue","totexpns": "totexpns","totexcessyr": "totexcessyr","othrchgsnetassetfnd": "othrchgsnetassetfnd","networthend": "networthend","totassetsend": "totassetsend","totliabend": "totliabend","totnetassetsend": "totnetassetsend","actvtynotprevrptcd": "actvtynotprevrptcd","chngsinorgcd": "chngsinorgcd","unrelbusincd": "unrelbusincd","filedf990tcd": "filedf990tcd","contractioncd": "contractioncd","politicalexpend": "politicalexpend","filedfYYN0polcd": "filedf1120polcd","loanstoofficerscd": "loanstoofficerscd","loanstoofficers": "loanstoofficers","initiationfee": "initiationfee","grspublicrcpts": "grspublicrcpts","s4958excessbenefcd": "s4958excessbenefcd","prohibtdtxshltrcd": "prohibtdtxshltrcd","nonpfrea": "nonpfrea","totnoforgscnt": "totnooforgscnt","totsupport": "totsupport","gftgrntrcvd170": "gftgrntsrcvd170","txrevnuelevied170": "txrevnuelevied170","srvcsval170": "srvcsval170","pubsuppsubtot170": "pubsuppsubtot170","excds2pct170": "exceeds2pct170","pubsupplesspct170": "pubsupplesspct170","samepubsuppsubtot170": "samepubsuppsubtot170","grsinc170": "grsinc170","netincunrelatd170": "netincunreltd170","othrinc170": "othrinc170","totsupport170": "totsupp170","grsrcptsrelatd170": "grsrcptsrelated170","totgftgrntrcvd509": "totgftgrntrcvd509","grsrcptsadmiss509": "grsrcptsadmissn509","grsrcptsactvts509": "grsrcptsactivities509","txrevnuelevied509": "txrevnuelevied509","srvcsval509": "srvcsval509","pubsuppsubtot509": "pubsuppsubtot509","rcvdfrmdisqualsub509": "rcvdfrmdisqualsub509","excds1pct509": "exceeds1pct509","subtotpub509": "subtotpub509","pubsupplesssub509": "pubsupplesub509","samepubsuppsubtot509": "samepubsuppsubtot509","grsinc509": "grsinc509","unreltxincls511tx509": "unreltxincls511tx509","subtotsuppinc509": "subtotsuppinc509","netincunreltd509": "netincunrelatd509","othrinc509": "othrinc509","totsupp509": "totsupp509","elf": "elf","totcntrbs": "totcntrbs"}', }, resources={"request_memory": "4G", "request_cpu": "1"}, ) diff --git a/datasets/irs_990/irs_990_ez_2017/pipeline.yaml b/datasets/irs_990/irs_990_ez_2017/pipeline.yaml index 5179d8efe..7cb823482 100644 --- a/datasets/irs_990/irs_990_ez_2017/pipeline.yaml +++ b/datasets/irs_990/irs_990_ez_2017/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_ez_2017 # Description of the table - description: "irs_990_ez_2017 dataset" + description: "IRS 990 EZ 2017 dataset" dag: airflow_version: 1 @@ -33,7 +33,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "@daily" + schedule_interval: "@daily" catchup: False default_view: graph @@ -107,300 +107,295 @@ dag: # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" + - name: "ein" + type: "string" description: "Employer Identification Number" - mode : "required" - - name : "elf" - type : "string" + mode: "required" + - name: "elf" + type: "string" description: "E-file indicator" - mode : "nullable" - - name : "tax_pd" - type : "integer" + mode: "nullable" + - name: "tax_pd" + type: "integer" description: "Tax period" - mode : "nullable" - - name : "subseccd" - type : "integer" + mode: "nullable" + - name: "subseccd" + type: "integer" description: "Subsection code" - mode : "nullable" - - name : "totcntrbs" - type : "integer" + mode: "nullable" + - name: "totcntrbs" + type: "integer" description: "Contributions gifts grants etc received" - mode : "nullable" - - name : "prgmservrev" - type : "integer" + mode: "nullable" + - name: "prgmservrev" + type: "integer" description: "Program service revenue" - mode : "nullable" - - name : "duesassesmnts" - type : "integer" + mode: "nullable" + - name: "duesassesmnts" + type: "integer" description: "Membership dues and assessments" - mode : "nullable" - - name : "othrinvstinc" - type : "integer" + mode: "nullable" + - name: "othrinvstinc" + type: "integer" description: "Investment income" - mode : "nullable" - - name : "grsamtsalesastothr" - type : "integer" + mode: "nullable" + - name: "grsamtsalesastothr" + type: "integer" description: "Gross amount from sale of assets" - mode : "nullable" - - name : "basisalesexpnsothr" - type : "integer" + mode: "nullable" + - name: "basisalesexpnsothr" + type: "integer" description: "Cost or other basis and sales expenses" - mode : "nullable" - - name : "gnsaleofastothr" - type : "integer" + mode: "nullable" + - name: "gnsaleofastothr" + type: "integer" description: "Gain or (loss) from sale of assets" - mode : "nullable" - - name : "grsincgaming" - type : "integer" + mode: "nullable" + - name: "grsincgaming" + type: "integer" description: "Gross income from gaming" - mode : "nullable" - - name : "grsrevnuefndrsng" - type : "integer" + mode: "nullable" + - name: "grsrevnuefndrsng" + type: "integer" description: "Special events gross revenue" - mode : "nullable" - - name : "direxpns" - type : "integer" + mode: "nullable" + - name: "direxpns" + type: "integer" description: "Special events direct expenses" - mode : "nullable" - - name : "netincfndrsng" - type : "integer" + mode: "nullable" + - name: "netincfndrsng" + type: "integer" description: "Special events net income (or loss)" - mode : "nullable" - - name : "grsalesminusret" - type : "integer" + mode: "nullable" + - name: "grsalesminusret" + type: "integer" description: "Gross sales of inventory" - mode : "nullable" - - name : "costgoodsold" - type : "integer" + mode: "nullable" + - name: "costgoodsold" + type: "integer" description: "Less: cost of goods sold" - mode : "nullable" - - name : "grsprft" - type : "integer" + mode: "nullable" + - name: "grsprft" + type: "integer" description: "Gross profit (or loss) from sales of inventory" - mode : "nullable" - - name : "othrevnue" - type : "integer" + mode: "nullable" + - name: "othrevnue" + type: "integer" description: "Other revenue - total" - mode : "nullable" - - name : "totrevnue" - type : "integer" + mode: "nullable" + - name: "totrevnue" + type: "integer" description: "Total revenue" - mode : "nullable" - - name : "totexpns" - type : "integer" + mode: "nullable" + - name: "totexpns" + type: "integer" description: "Total expenses" - mode : "nullable" - - name : "totexcessyr" - type : "integer" + mode: "nullable" + - name: "totexcessyr" + type: "integer" description: "Excess or deficit" - mode : "nullable" - - name : "othrchgsnetassetfnd" - type : "integer" + mode: "nullable" + - name: "othrchgsnetassetfnd" + type: "integer" description: "Other changes in net assets" - mode : "nullable" - - name : "networthend" - type : "integer" + mode: "nullable" + - name: "networthend" + type: "integer" description: "Net assets EOY" - mode : "nullable" - - name : "totassetsend" - type : "integer" + mode: "nullable" + - name: "totassetsend" + type: "integer" description: "Total assets e-o-y" - mode : "nullable" - - name : "totliabend" - type : "integer" + mode: "nullable" + - name: "totliabend" + type: "integer" description: "Total liabilities e-o-y" - mode : "nullable" - - name : "totnetassetsend" - type : "integer" + mode: "nullable" + - name: "totnetassetsend" + type: "integer" description: "Total net worth e-o-y" - mode : "nullable" - - name : "actvtynotprevrptcd" - type : "string" + mode: "nullable" + - name: "actvtynotprevrptcd" + type: "string" description: "Activity not previously reported?" - mode : "nullable" - - name : "chngsinorgcd" - type : "string" + mode: "nullable" + - name: "chngsinorgcd" + type: "string" description: "Significant changes to governing docs?" - mode : "nullable" - - name : "unrelbusincd" - type : "string" + mode: "nullable" + - name: "unrelbusincd" + type: "string" description: "UBI over $1000?" - mode : "nullable" - - name : "filedf990tcd" - type : "string" + mode: "nullable" + - name: "filedf990tcd" + type: "string" description: "Organization Filed 990T" - mode : "nullable" - - name : "contractioncd" - type : "string" + mode: "nullable" + - name: "contractioncd" + type: "string" description: "Liquidation dissolution termination or contraction" - mode : "nullable" - - name : "politicalexpend" - type : "integer" + mode: "nullable" + - name: "politicalexpend" + type: "integer" description: "Direct or indirect political expenditures" - mode : "nullable" - - name : "filedf1120polcd" - type : "string" + mode: "nullable" + - name: "filedf1120polcd" + type: "string" description: "File Form 1120-POL?" - mode : "nullable" - - name : "loanstoofficerscd" - type : "string" + mode: "nullable" + - name: "loanstoofficerscd" + type: "string" description: "Loans to/from officers directors or trustees?" - mode : "nullable" - - name : "loanstoofficers" - type : "integer" + mode: "nullable" + - name: "loanstoofficers" + type: "integer" description: "Amount of loans to/from officers" - mode : "nullable" - - name : "initiationfee" - type : "integer" + mode: "nullable" + - name: "initiationfee" + type: "integer" description: "Initiation fees and capital contributions" - mode : "nullable" - - name : "grspublicrcpts" - type : "integer" + mode: "nullable" + - name: "grspublicrcpts" + type: "integer" description: "Gross receipts for public use of club facilities" - mode : "nullable" - - name : "s4958excessbenefcd" - type : "string" + mode: "nullable" + - name: "s4958excessbenefcd" + type: "string" description: "Section 4958 excess benefit transactions?" - mode : "nullable" - - name : "prohibtdtxshltrcd" - type : "string" + mode: "nullable" + - name: "prohibtdtxshltrcd" + type: "string" description: "Party to a prohibited tax shelter transaction?" - mode : "nullable" - - name : "nonpfrea" - type : "integer" + mode: "nullable" + - name: "nonpfrea" + type: "integer" description: "Reason for non-PF status" - mode : "nullable" - - name : "totnooforgscnt" - type : "integer" + mode: "nullable" + - name: "totnooforgscnt" + type: "integer" description: "Number of organizations supported" - mode : "nullable" - - name : "totsupport" - type : "integer" + mode: "nullable" + - name: "totsupport" + type: "integer" description: "Sum of amounts of support" - mode : "nullable" - - name : "gftgrntsrcvd170" - type : "integer" + mode: "nullable" + - name: "gftgrntsrcvd170" + type: "integer" description: "Gifts grants membership fees received (170)" - mode : "nullable" - - name : "txrevnuelevied170" - type : "integer" + mode: "nullable" + - name: "txrevnuelevied170" + type: "integer" description: "Tax revenues levied (170)" - mode : "nullable" - - name : "srvcsval170" - type : "integer" + mode: "nullable" + - name: "srvcsval170" + type: "integer" description: "Services or facilities furnished by gov (170)" - mode : "nullable" - - name : "pubsuppsubtot170" - type : "integer" + mode: "nullable" + - name: "pubsuppsubtot170" + type: "integer" description: "Public support subtotal (170)" - mode : "nullable" - - name : "exceeds2pct170" - type : "integer" + mode: "nullable" + - name: "exceeds2pct170" + type: "integer" description: "Amount support exceeds total (170)" - mode : "nullable" - - name : "pubsupplesspct170" - type : "integer" + mode: "nullable" + - name: "pubsupplesspct170" + type: "integer" description: "Public support (170)" - mode : "nullable" - - name : "samepubsuppsubtot170" - type : "integer" + mode: "nullable" + - name: "samepubsuppsubtot170" + type: "integer" description: "Public support from line 4 (170)" - mode : "nullable" - - name : "grsinc170" - type : "integer" + mode: "nullable" + - name: "grsinc170" + type: "integer" description: "Gross income from interest etc (170)" - mode : "nullable" - - name : "netincunreltd170" - type : "integer" + mode: "nullable" + - name: "netincunreltd170" + type: "integer" description: "Net UBI (170)" - mode : "nullable" - - name : "othrinc170" - type : "integer" + mode: "nullable" + - name: "othrinc170" + type: "integer" description: "Other income (170)" - mode : "nullable" - - name : "totsupp170" - type : "integer" + mode: "nullable" + - name: "totsupp170" + type: "integer" description: "Total support (170)" - mode : "nullable" - - name : "grsrcptsrelated170" - type : "integer" + mode: "nullable" + - name: "grsrcptsrelated170" + type: "integer" description: "Gross receipts from related activities (170)" - mode : "nullable" - - name : "totgftgrntrcvd509" - type : "integer" + mode: "nullable" + - name: "totgftgrntrcvd509" + type: "integer" description: "Gifts grants membership fees received (509)" - mode : "nullable" - - name : "grsrcptsadmissn509" - type : "integer" + mode: "nullable" + - name: "grsrcptsadmissn509" + type: "integer" description: "Receipts from admissions merchandise etc (509)" - mode : "nullable" - - name : "grsrcptsactivities509" - type : "integer" + mode: "nullable" + - name: "grsrcptsactivities509" + type: "integer" description: "Gross receipts from related activities (509)" - mode : "nullable" - - name : "txrevnuelevied509" - type : "integer" + mode: "nullable" + - name: "txrevnuelevied509" + type: "integer" description: "Tax revenues levied (509)" - mode : "nullable" - - name : "srvcsval509" - type : "integer" + mode: "nullable" + - name: "srvcsval509" + type: "integer" description: "Services or facilities furnished by gov (509)" - mode : "nullable" - - name : "pubsuppsubtot509" - type : "integer" + mode: "nullable" + - name: "pubsuppsubtot509" + type: "integer" description: "Public support subtotal (509)" - mode : "nullable" - - name : "rcvdfrmdisqualsub509" - type : "integer" + mode: "nullable" + - name: "rcvdfrmdisqualsub509" + type: "integer" description: "Amounts from disqualified persons (509)" - mode : "nullable" - - name : "exceeds1pct509" - type : "integer" + mode: "nullable" + - name: "exceeds1pct509" + type: "integer" description: "Amount support exceeds total (509)" - mode : "nullable" - - name : "subtotpub509" - type : "integer" + mode: "nullable" + - name: "subtotpub509" + type: "integer" description: "Public support subtotal (509)" - mode : "nullable" - - name : "pubsupplesub509" - type : "integer" + mode: "nullable" + - name: "pubsupplesub509" + type: "integer" description: "Public support (509)" - mode : "nullable" - - name : "samepubsuppsubtot509" - type : "integer" + mode: "nullable" + - name: "samepubsuppsubtot509" + type: "integer" description: "Public support from line 6 (509)" - mode : "nullable" - - name : "grsinc509" - type : "integer" + mode: "nullable" + - name: "grsinc509" + type: "integer" description: "Gross income from interest etc (509)" - mode : "nullable" - - name : "unreltxincls511tx509" - type : "integer" + mode: "nullable" + - name: "unreltxincls511tx509" + type: "integer" description: "Net UBI (509)" - mode : "nullable" - - name : "subtotsuppinc509" - type : "integer" + mode: "nullable" + - name: "subtotsuppinc509" + type: "integer" description: "Subtotal total support (509)" - mode : "nullable" - - name : "netincunrelatd509" - type : "integer" + mode: "nullable" + - name: "netincunrelatd509" + type: "integer" description: "Net income from UBI not in 10b (509)" - mode : "nullable" - - name : "othrinc509" - type : "integer" + mode: "nullable" + - name: "othrinc509" + type: "integer" description: "Other income (509)" - mode : "nullable" - - name : "totsupp509" - type : "integer" + mode: "nullable" + - name: "totsupp509" + type: "integer" description: "Total support (509)" - mode : "nullable" + mode: "nullable" graph_paths: - "irs_990_ez_2017_transform_csv >> load_irs_990_ez_2017_to_bq" - - - - - \ No newline at end of file diff --git a/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py b/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py index 7660d0d46..189cd7382 100644 --- a/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py +++ b/datasets/irs_990/irs_990_pf_2014/irs_990_pf_2014_dag.py @@ -39,7 +39,7 @@ name="irs_990_pf_2014", namespace="default", image_pull_policy="Always", - image="{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", env_vars={ "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/14eofinextract990pf.zip", "SOURCE_FILE": "files/data.zip", diff --git a/datasets/irs_990/irs_990_pf_2014/pipeline.yaml b/datasets/irs_990/irs_990_pf_2014/pipeline.yaml index 6c477b085..a40eb889d 100644 --- a/datasets/irs_990/irs_990_pf_2014/pipeline.yaml +++ b/datasets/irs_990/irs_990_pf_2014/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_pf_2014 # Description of the table - description: "irs_990_pf_2014 dataset" + description: "IRS 990 PF 2014 dataset" dag: airflow_version: 1 @@ -33,7 +33,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "@daily" + schedule_interval: "@daily" catchup: False default_view: graph @@ -58,7 +58,7 @@ dag: image_pull_policy: "Always" # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. - image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}" + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. env_vars: @@ -105,724 +105,719 @@ dag: # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" - description : "Employer Identification Number" - mode : "required" - - name : "tax_prd" - type : "string" - description : "Tax period (YYYYMM format)" - mode : "nullable" - - name : "eostatus" - type : "string" - description : "EO Status Code" - mode : "nullable" - - name : "tax_yr" - type : "integer" - description : "SOI Year" - mode : "nullable" - - name : "operatingcd" - type : "string" - description : "Operating foundation code" - mode : "nullable" - - name : "subcd" - type : "string" - description : "Subsection code" - mode : "nullable" - - name : "fairmrktvalamt" - type : "integer" - description : "Total assets – e-o-y fair market valu" - mode : "nullable" - - name : "grscontrgifts" - type : "integer" - description : "Contributions received" - mode : "nullable" - - name : "schedbind" - type : "string" - description : "Schedule B indicator" - mode : "nullable" - - name : "intrstrvnue" - type : "integer" - description : "Interest revenue" - mode : "nullable" - - name : "dividndsamt" - type : "integer" - description : "" - mode : "nullable" - - name : "grsrents" - type : "integer" - description : "Gross rents" - mode : "nullable" - - name : "grsslspramt" - type : "integer" - description : "Gross sales price for assets" - mode : "nullable" - - name : "costsold" - type : "integer" - description : "Cost-of-goods-sold" - mode : "nullable" - - name : "grsprofitbus" - type : "integer" - description : "Gross profit" - mode : "nullable" - - name : "otherincamt" - type : "integer" - description : "Other income" - mode : "nullable" - - name : "totrcptperbks" - type : "integer" - description : "Total revenue" - mode : "nullable" - - name : "compofficers" - type : "integer" - description : "Compensation of officers" - mode : "nullable" - - name : "pensplemplbenf" - type : "integer" - description : "Pension plans employee benefits" - mode : "nullable" - - name : "legalfeesamt" - type : "integer" - description : "Legal fees" - mode : "nullable" - - name : "accountingfees" - type : "integer" - description : "Accounting fees" - mode : "nullable" - - name : "interestamt" - type : "integer" - description : "Interest" - mode : "nullable" - - name : "depreciationamt" - type : "integer" - description : "Depreciation and depletion" - mode : "nullable" - - name : "occupancyamt" - type : "integer" - description : "Occupancy" - mode : "nullable" - - name : "travlconfmtngs" - type : "integer" - description : "Travel conferences and meetings" - mode : "nullable" - - name : "printingpubl" - type : "integer" - description : "Printing and publications" - mode : "nullable" - - name : "topradmnexpnsa" - type : "integer" - description : "Total operating and administrative expenses column a" - mode : "nullable" - - name : "contrpdpbks" - type : "integer" - description : "Contributions gifts grants paid" - mode : "nullable" - - name : "totexpnspbks" - type : "integer" - description : "Total expenses" - mode : "nullable" - - name : "excessrcpts" - type : "integer" - description : "Net income less deficit" - mode : "nullable" - - name : "totrcptnetinc" - type : "integer" - description : "Total receipts net investment income" - mode : "nullable" - - name : "topradmnexpnsb" - type : "integer" - description : "Total operating and administrative expenses column b" - mode : "nullable" - - name : "totexpnsnetinc" - type : "integer" - description : "Total expenses net investment income" - mode : "nullable" - - name : "netinvstinc" - type : "integer" - description : "Net investment income" - mode : "nullable" - - name : "trcptadjnetinc" - type : "integer" - description : "Total receipts adjusted net income" - mode : "nullable" - - name : "totexpnsadjnet" - type : "integer" - description : "Total expenses adjusted net income" - mode : "nullable" - - name : "adjnetinc" - type : "integer" - description : "Adjusted net income" - mode : "nullable" - - name : "topradmnexpnsd" - type : "integer" - description : "Total operating and administrative expenses column d" - mode : "nullable" - - name : "totexpnsexempt" - type : "integer" - description : "Total expenses – exempt purpose" - mode : "nullable" - - name : "othrcashamt" - type : "integer" - description : "Cash non-interest-bearing – e-o-y book value" - mode : "nullable" - - name : "invstgovtoblig" - type : "integer" - description : "Investments in U.S. & state government obligations – e-o-y book value" - mode : "nullable" - - name : "invstcorpstk" - type : "integer" - description : "Investments in corporate stock – e-o-y book value" - mode : "nullable" - - name : "invstcorpbnd" - type : "integer" - description : "Investments in corporate bonds– e-o-y book value" - mode : "nullable" - - name : "totinvstsec" - type : "integer" - description : "Total investments in securities – e-o-y book value" - mode : "nullable" - - name : "mrtgloans" - type : "integer" - description : "Investments mortgage loans – e-o-y book value" - mode : "nullable" - - name : "othrinvstend" - type : "integer" - description : "Other investments – e-o-y book value" - mode : "nullable" - - name : "othrassetseoy" - type : "integer" - description : "Other assets – e-o-y book value" - mode : "nullable" - - name : "totassetsend" - type : "integer" - description : "Total assets – e-o-y book value" - mode : "nullable" - - name : "mrtgnotespay" - type : "integer" - description : "Mortgage loans payable – e-o-y book value" - mode : "nullable" - - name : "othrliabltseoy" - type : "integer" - description : "Other liabilities – e-o-y book value" - mode : "nullable" - - name : "totliabend" - type : "integer" - description : "Total liabilities – e-o-y book value" - mode : "nullable" - - name : "tfundnworth" - type : "integer" - description : "Total fund net worth – e-o-y book value" - mode : "nullable" - - name : "fairmrktvaleoy" - type : "integer" - description : "Total assets – e-o-y fair market value" - mode : "nullable" - - name : "totexcapgnls" - type : "integer" - description : "Capital gain net income" - mode : "nullable" - - name : "totexcapgn" - type : "integer" - description : "Net gain – sales of assets" - mode : "nullable" - - name : "totexcapls" - type : "integer" - description : "Net loss – sales of assets" - mode : "nullable" - - name : "invstexcisetx" - type : "integer" - description : "Excise tax on net investment income" - mode : "nullable" - - name : "sec4940notxcd" - type : "string" - description : "Section 4940 – no tax" - mode : "nullable" - - name : "sec4940redtxcd" - type : "string" - description : "Section 4940 – 1 % tax" - mode : "nullable" - - name : "sect511tx" - type : "integer" - description : "Section 511 tax" - mode : "nullable" - - name : "subtitleatx" - type : "integer" - description : "Subtitle A tax" - mode : "nullable" - - name : "totaxpyr" - type : "integer" - description : "Total excise tax" - mode : "nullable" - - name : "esttaxcr" - type : "integer" - description : "Estimated tax credit" - mode : "nullable" - - name : "txwithldsrc" - type : "integer" - description : "Tax withheld at source" - mode : "nullable" - - name : "txpaidf2758" - type : "integer" - description : "Tax paid with Form 2758 (filing extension)" - mode : "nullable" - - name : "erronbkupwthld" - type : "integer" - description : "Erroneous backup withholding credit amount" - mode : "nullable" - - name : "estpnlty" - type : "integer" - description : "Estimated tax penalty" - mode : "nullable" - - name : "taxdue" - type : "integer" - description : "Tax due" - mode : "nullable" - - name : "overpay" - type : "integer" - description : "Overpayment" - mode : "nullable" - - name : "crelamt" - type : "integer" - description : "Credit elect amount" - mode : "nullable" - - name : "infleg" - type : "string" - description : "Influence legislation?" - mode : "nullable" - - name : "actnotpr" - type : "string" - description : "Activities not previously reported?" - mode : "nullable" - - name : "chgnprvrptcd" - type : "string" - description : "Changes not previously reported?" - mode : "nullable" - - name : "filedf990tcd" - type : "string" - description : "Filed 990-T?" - mode : "nullable" - - name : "contractncd" - type : "string" - description : "Contraction?" - mode : "nullable" - - name : "furnishcpycd" - type : "string" - description : "Furnished copy to Attorney General?" - mode : "nullable" - - name : "claimstatcd" - type : "string" - description : "Claiming status?" - mode : "nullable" - - name : "cntrbtrstxyrcd" - type : "string" - description : "Substantial contributors?" - mode : "nullable" - - name : "acqdrindrintcd" - type : "string" - description : "Distribution to donor advised fund with advisory privileges?" - mode : "nullable" - - name : "orgcmplypubcd" - type : "string" - description : "Comply with public inspection?" - mode : "nullable" - - name : "filedlf1041ind" - type : "string" - description : "Comply with public inspection?" - mode : "nullable" - - name : "propexchcd" - type : "string" - description : "Property exchange?" - mode : "nullable" - - name : "brwlndmnycd" - type : "string" - description : "Borrow lend money?" - mode : "nullable" - - name : "furngoodscd" - type : "string" - description : "Furnished goods?" - mode : "nullable" - - name : "paidcmpncd" - type : "string" - description : "Paid compensation?" - mode : "nullable" - - name : "transfercd" - type : "string" - description : "Transfer?" - mode : "nullable" - - name : "agremkpaycd" - type : "string" - description : "Agree to make pay?" - mode : "nullable" - - name : "exceptactsind" - type : "string" - description : "Acts fail to qualify under section 53.4941(d)-3?" - mode : "nullable" - - name : "prioractvcd" - type : "string" - description : "Engage in acts in prior year?" - mode : "nullable" - - name : "undistrinccd" - type : "string" - description : "Undistributed income?" - mode : "nullable" - - name : "applyprovind" - type : "string" - description : "Not applying section 4942(a)(2) provisions?" - mode : "nullable" - - name : "dirindirintcd" - type : "string" - description : "Direct indirect interest?" - mode : "nullable" - - name : "excesshldcd" - type : "string" - description : "Excess business holdings?" - mode : "nullable" - - name : "invstjexmptcd" - type : "string" - description : "Jeopardizing investments?" - mode : "nullable" - - name : "prevjexmptcd" - type : "string" - description : "Prior year jeopardizing investments?" - mode : "nullable" - - name : "propgndacd" - type : "string" - description : "Propaganda?" - mode : "nullable" - - name : "ipubelectcd" - type : "string" - description : "Influence public election?" - mode : "nullable" - - name : "grntindivcd" - type : "string" - description : "Grant individual?" - mode : "nullable" - - name : "nchrtygrntcd" - type : "string" - description : "Non-charity grant?" - mode : "nullable" - - name : "nreligiouscd" - type : "string" - description : "Non-religious?" - mode : "nullable" - - name : "excptransind" - type : "string" - description : "Transactions fail to qualify under section 53.4945?" - mode : "nullable" - - name : "rfprsnlbnftind" - type : "string" - description : "Receive funds to pay premiums on personal benefit contract?" - mode : "nullable" - - name : "pyprsnlbnftind" - type : "string" - description : "Pay premiums on personal benefit contract?" - mode : "nullable" - - name : "tfairmrktunuse" - type : "integer" - description : "Fair market value of assets not used for charitable purposes" - mode : "nullable" - - name : "valncharitassets" - type : "integer" - description : "Net value of noncharitable-use assets" - mode : "nullable" - - name : "cmpmininvstret" - type : "integer" - description : "Minimum investment return" - mode : "nullable" - - name : "distribamt" - type : "integer" - description : "Distributable amount" - mode : "nullable" - - name : "undistribincyr" - type : "integer" - description : "Undistributed income" - mode : "nullable" - - name : "adjnetinccola" - type : "integer" - description : "Adjusted net income column a" - mode : "nullable" - - name : "adjnetinccolb" - type : "integer" - description : "Adjusted net income column b" - mode : "nullable" - - name : "adjnetinccolc" - type : "integer" - description : "Adjusted net income column c" - mode : "nullable" - - name : "adjnetinccold" - type : "integer" - description : "Adjusted net income column d" - mode : "nullable" - - name : "adjnetinctot" - type : "integer" - description : "Adjusted net income total" - mode : "nullable" - - name : "qlfydistriba" - type : "integer" - description : "Qualifying distributions column a" - mode : "nullable" - - name : "qlfydistribb" - type : "integer" - description : "Qualifying distributions column b" - mode : "nullable" - - name : "qlfydistribc" - type : "integer" - description : "Qualifying distributions column c" - mode : "nullable" - - name : "qlfydistribd" - type : "integer" - description : "Qualifying distributions column d" - mode : "nullable" - - name : "qlfydistribtot" - type : "integer" - description : "Qualifying distributions total" - mode : "nullable" - - name : "valassetscola" - type : "integer" - description : "Value assets column a" - mode : "nullable" - - name : "valassetscolb" - type : "integer" - description : "Value assets column b" - mode : "nullable" - - name : "valassetscolc" - type : "integer" - description : "Value assets column c" - mode : "nullable" - - name : "valassetscold" - type : "integer" - description : "Value assets column d" - mode : "nullable" - - name : "valassetstot" - type : "integer" - description : "Value assets total" - mode : "nullable" - - name : "qlfyasseta" - type : "integer" - description : "Qualifying assets column a" - mode : "nullable" - - name : "qlfyassetb" - type : "integer" - description : "Qualifying assets column b" - mode : "nullable" - - name : "qlfyassetc" - type : "integer" - description : "Qualifying assets column c" - mode : "nullable" - - name : "qlfyassetd" - type : "integer" - description : "Qualifying assets column d" - mode : "nullable" - - name : "qlfyassettot" - type : "integer" - description : "Qualifying assets total" - mode : "nullable" - - name : "endwmntscola" - type : "integer" - description : "Endowments column a" - mode : "nullable" - - name : "endwmntscolb" - type : "integer" - description : "Endowments column b" - mode : "nullable" - - name : "endwmntscolc" - type : "integer" - description : "Endowments column c" - mode : "nullable" - - name : "endwmntscold" - type : "integer" - description : "Endowments column d" - mode : "nullable" - - name : "endwmntstot" - type : "integer" - description : "Endowments total" - mode : "nullable" - - name : "totsuprtcola" - type : "integer" - description : "Total support column a" - mode : "nullable" - - name : "totsuprtcolb" - type : "integer" - description : "Total support column b" - mode : "nullable" - - name : "totsuprtcolc" - type : "integer" - description : "Total support column c" - mode : "nullable" - - name : "totsuprtcold" - type : "integer" - description : "Total support column d" - mode : "nullable" - - name : "totsuprttot" - type : "integer" - description : "Total support total" - mode : "nullable" - - name : "pubsuprtcola" - type : "integer" - description : "Public support column a" - mode : "nullable" - - name : "pubsuprtcolb" - type : "integer" - description : "Public support column b" - mode : "nullable" - - name : "pubsuprtcolc" - type : "integer" - description : "Public support column c" - mode : "nullable" - - name : "pubsuprtcold" - type : "integer" - description : "Public support column d" - mode : "nullable" - - name : "pubsuprttot" - type : "integer" - description : "Public support total" - mode : "nullable" - - name : "grsinvstinca" - type : "integer" - description : "Gross investment income column a" - mode : "nullable" - - name : "grsinvstincb" - type : "integer" - description : "Gross investment income column b" - mode : "nullable" - - name : "grsinvstincc" - type : "integer" - description : "Gross investment income column c" - mode : "nullable" - - name : "grsinvstincd" - type : "integer" - description : "Gross investment income column d" - mode : "nullable" - - name : "grsinvstinctot" - type : "integer" - description : "Gross investment income total" - mode : "nullable" - - name : "grntapprvfut" - type : "integer" - description : "Grants approved for future payment" - mode : "nullable" - - name : "progsrvcacold" - type : "integer" - description : "Program service revenue line 1a (excluded)" - mode : "nullable" - - name : "progsrvcacole" - type : "integer" - description : "Program service revenue line 1a (exempt)" - mode : "nullable" - - name : "progsrvcbcold" - type : "integer" - description : "Program service revenue line 1b (excluded)" - mode : "nullable" - - name : "progsrvcbcole" - type : "integer" - description : "Program service revenue line 1b (exempt)" - mode : "nullable" - - name : "progsrvcccold" - type : "integer" - description : "Program service revenue line 1c (excluded)" - mode : "nullable" - - name : "progsrvcccole" - type : "integer" - description : "Program service revenue line 1c (exempt)" - mode : "nullable" - - name : "progsrvcdcold" - type : "integer" - description : "Program service revenue line 1d (excluded)" - mode : "nullable" - - name : "progsrvcdcole" - type : "integer" - description : "Program service revenue line 1d (exempt)" - mode : "nullable" - - name : "progsrvcecold" - type : "integer" - description : "Program service revenue line 1e (excluded)" - mode : "nullable" - - name : "progsrvcecole" - type : "integer" - description : "Program service revenue line 1e (exempt)" - mode : "nullable" - - name : "progsrvcfcold" - type : "integer" - description : "Program service revenue line 1f (excluded)" - mode : "nullable" - - name : "progsrvcfcole" - type : "integer" - description : "Program service revenue line 1f (exempt)" - mode : "nullable" - - name : "progsrvcgcold" - type : "integer" - description : "Program service revenue--fees and contracts from government line 1g (excluded)" - mode : "nullable" - - name : "progsrvcgcole" - type : "integer" - description : "Program service revenue--fees and contracts from government line 1g (exempt)" - mode : "nullable" - - name : "membershpduesd" - type : "integer" - description : "Membership dues and assessments (excluded)" - mode : "nullable" - - name : "membershpduese" - type : "integer" - description : "Membership dues and assessments (exempt)" - mode : "nullable" - - name : "intonsvngsd" - type : "integer" - description : "Interest on savings and temporary cash investments (excluded)" - mode : "nullable" - - name : "intonsvngse" - type : "integer" - description : "Interest on savings and temporary cash investments (exempt)" - mode : "nullable" - - name : "dvdndsintd" - type : "integer" - description : "Dividends and interest from securities (excluded)" - mode : "nullable" - - name : "dvdndsinte" - type : "integer" - description : "Dividends and interest from securities (exempt)" - mode : "nullable" - - name : "trnsfrcashcd" - type : "string" - description : "Transfer cash to noncharitable exempt organization?" - mode : "nullable" - - name : "trnsothasstscd" - type : "string" - description : "Transfer other assets to noncharitable exempt organization?" - mode : "nullable" - - name : "salesasstscd" - type : "string" - description : "Sale of assets to noncharitable exempt organization?" - mode : "nullable" - - name : "prchsasstscd" - type : "string" - description : "Purchase of assets from noncharitable exempt organization?" - mode : "nullable" - - name : "rentlsfacltscd" - type : "string" - description : "Rental of facilities or other assets?" - mode : "nullable" - - name : "reimbrsmntscd" - type : "string" - description : "Reimbursements arrangements?" - mode : "nullable" - - name : "loansguarcd" - type : "string" - description : "Loans or other guarantees?" - mode : "nullable" - - name : "perfservicescd" - type : "string" - description : "Performance of services or membership or fundraising solicitations?" - mode : "nullable" - - name : "sharngasstscd" - type : "string" - description : "Sharing of facilities equipment mailing lists other assets or paid employees?" - mode : "nullable" + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "tax_prd" + type: "string" + description: "Tax period (YYYYMM format)" + mode: "nullable" + - name: "eostatus" + type: "string" + description: "EO Status Code" + mode: "nullable" + - name: "tax_yr" + type: "integer" + description: "SOI Year" + mode: "nullable" + - name: "operatingcd" + type: "string" + description: "Operating foundation code" + mode: "nullable" + - name: "subcd" + type: "string" + description: "Subsection code" + mode: "nullable" + - name: "fairmrktvalamt" + type: "integer" + description: "Total assets – e-o-y fair market valu" + mode: "nullable" + - name: "grscontrgifts" + type: "integer" + description: "Contributions received" + mode: "nullable" + - name: "schedbind" + type: "string" + description: "Schedule B indicator" + mode: "nullable" + - name: "intrstrvnue" + type: "integer" + description: "Interest revenue" + mode: "nullable" + - name: "dividndsamt" + type: "integer" + description: "" + mode: "nullable" + - name: "grsrents" + type: "integer" + description: "Gross rents" + mode: "nullable" + - name: "grsslspramt" + type: "integer" + description: "Gross sales price for assets" + mode: "nullable" + - name: "costsold" + type: "integer" + description: "Cost-of-goods-sold" + mode: "nullable" + - name: "grsprofitbus" + type: "integer" + description: "Gross profit" + mode: "nullable" + - name: "otherincamt" + type: "integer" + description: "Other income" + mode: "nullable" + - name: "totrcptperbks" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "compofficers" + type: "integer" + description: "Compensation of officers" + mode: "nullable" + - name: "pensplemplbenf" + type: "integer" + description: "Pension plans employee benefits" + mode: "nullable" + - name: "legalfeesamt" + type: "integer" + description: "Legal fees" + mode: "nullable" + - name: "accountingfees" + type: "integer" + description: "Accounting fees" + mode: "nullable" + - name: "interestamt" + type: "integer" + description: "Interest" + mode: "nullable" + - name: "depreciationamt" + type: "integer" + description: "Depreciation and depletion" + mode: "nullable" + - name: "occupancyamt" + type: "integer" + description: "Occupancy" + mode: "nullable" + - name: "travlconfmtngs" + type: "integer" + description: "Travel conferences and meetings" + mode: "nullable" + - name: "printingpubl" + type: "integer" + description: "Printing and publications" + mode: "nullable" + - name: "topradmnexpnsa" + type: "integer" + description: "Total operating and administrative expenses column a" + mode: "nullable" + - name: "contrpdpbks" + type: "integer" + description: "Contributions gifts grants paid" + mode: "nullable" + - name: "totexpnspbks" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "excessrcpts" + type: "integer" + description: "Net income less deficit" + mode: "nullable" + - name: "totrcptnetinc" + type: "integer" + description: "Total receipts net investment income" + mode: "nullable" + - name: "topradmnexpnsb" + type: "integer" + description: "Total operating and administrative expenses column b" + mode: "nullable" + - name: "totexpnsnetinc" + type: "integer" + description: "Total expenses net investment income" + mode: "nullable" + - name: "netinvstinc" + type: "integer" + description: "Net investment income" + mode: "nullable" + - name: "trcptadjnetinc" + type: "integer" + description: "Total receipts adjusted net income" + mode: "nullable" + - name: "totexpnsadjnet" + type: "integer" + description: "Total expenses adjusted net income" + mode: "nullable" + - name: "adjnetinc" + type: "integer" + description: "Adjusted net income" + mode: "nullable" + - name: "topradmnexpnsd" + type: "integer" + description: "Total operating and administrative expenses column d" + mode: "nullable" + - name: "totexpnsexempt" + type: "integer" + description: "Total expenses – exempt purpose" + mode: "nullable" + - name: "othrcashamt" + type: "integer" + description: "Cash non-interest-bearing – e-o-y book value" + mode: "nullable" + - name: "invstgovtoblig" + type: "integer" + description: "Investments in U.S. & state government obligations – e-o-y book value" + mode: "nullable" + - name: "invstcorpstk" + type: "integer" + description: "Investments in corporate stock – e-o-y book value" + mode: "nullable" + - name: "invstcorpbnd" + type: "integer" + description: "Investments in corporate bonds– e-o-y book value" + mode: "nullable" + - name: "totinvstsec" + type: "integer" + description: "Total investments in securities – e-o-y book value" + mode: "nullable" + - name: "mrtgloans" + type: "integer" + description: "Investments mortgage loans – e-o-y book value" + mode: "nullable" + - name: "othrinvstend" + type: "integer" + description: "Other investments – e-o-y book value" + mode: "nullable" + - name: "othrassetseoy" + type: "integer" + description: "Other assets – e-o-y book value" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets – e-o-y book value" + mode: "nullable" + - name: "mrtgnotespay" + type: "integer" + description: "Mortgage loans payable – e-o-y book value" + mode: "nullable" + - name: "othrliabltseoy" + type: "integer" + description: "Other liabilities – e-o-y book value" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities – e-o-y book value" + mode: "nullable" + - name: "tfundnworth" + type: "integer" + description: "Total fund net worth – e-o-y book value" + mode: "nullable" + - name: "fairmrktvaleoy" + type: "integer" + description: "Total assets – e-o-y fair market value" + mode: "nullable" + - name: "totexcapgnls" + type: "integer" + description: "Capital gain net income" + mode: "nullable" + - name: "totexcapgn" + type: "integer" + description: "Net gain – sales of assets" + mode: "nullable" + - name: "totexcapls" + type: "integer" + description: "Net loss – sales of assets" + mode: "nullable" + - name: "invstexcisetx" + type: "integer" + description: "Excise tax on net investment income" + mode: "nullable" + - name: "sec4940notxcd" + type: "string" + description: "Section 4940 – no tax" + mode: "nullable" + - name: "sec4940redtxcd" + type: "string" + description: "Section 4940 – 1 % tax" + mode: "nullable" + - name: "sect511tx" + type: "integer" + description: "Section 511 tax" + mode: "nullable" + - name: "subtitleatx" + type: "integer" + description: "Subtitle A tax" + mode: "nullable" + - name: "totaxpyr" + type: "integer" + description: "Total excise tax" + mode: "nullable" + - name: "esttaxcr" + type: "integer" + description: "Estimated tax credit" + mode: "nullable" + - name: "txwithldsrc" + type: "integer" + description: "Tax withheld at source" + mode: "nullable" + - name: "txpaidf2758" + type: "integer" + description: "Tax paid with Form 2758 (filing extension)" + mode: "nullable" + - name: "erronbkupwthld" + type: "integer" + description: "Erroneous backup withholding credit amount" + mode: "nullable" + - name: "estpnlty" + type: "integer" + description: "Estimated tax penalty" + mode: "nullable" + - name: "taxdue" + type: "integer" + description: "Tax due" + mode: "nullable" + - name: "overpay" + type: "integer" + description: "Overpayment" + mode: "nullable" + - name: "crelamt" + type: "integer" + description: "Credit elect amount" + mode: "nullable" + - name: "infleg" + type: "string" + description: "Influence legislation?" + mode: "nullable" + - name: "actnotpr" + type: "string" + description: "Activities not previously reported?" + mode: "nullable" + - name: "chgnprvrptcd" + type: "string" + description: "Changes not previously reported?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Filed 990-T?" + mode: "nullable" + - name: "contractncd" + type: "string" + description: "Contraction?" + mode: "nullable" + - name: "furnishcpycd" + type: "string" + description: "Furnished copy to Attorney General?" + mode: "nullable" + - name: "claimstatcd" + type: "string" + description: "Claiming status?" + mode: "nullable" + - name: "cntrbtrstxyrcd" + type: "string" + description: "Substantial contributors?" + mode: "nullable" + - name: "acqdrindrintcd" + type: "string" + description: "Distribution to donor advised fund with advisory privileges?" + mode: "nullable" + - name: "orgcmplypubcd" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "filedlf1041ind" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "propexchcd" + type: "string" + description: "Property exchange?" + mode: "nullable" + - name: "brwlndmnycd" + type: "string" + description: "Borrow lend money?" + mode: "nullable" + - name: "furngoodscd" + type: "string" + description: "Furnished goods?" + mode: "nullable" + - name: "paidcmpncd" + type: "string" + description: "Paid compensation?" + mode: "nullable" + - name: "transfercd" + type: "string" + description: "Transfer?" + mode: "nullable" + - name: "agremkpaycd" + type: "string" + description: "Agree to make pay?" + mode: "nullable" + - name: "exceptactsind" + type: "string" + description: "Acts fail to qualify under section 53.4941(d)-3?" + mode: "nullable" + - name: "prioractvcd" + type: "string" + description: "Engage in acts in prior year?" + mode: "nullable" + - name: "undistrinccd" + type: "string" + description: "Undistributed income?" + mode: "nullable" + - name: "applyprovind" + type: "string" + description: "Not applying section 4942(a)(2) provisions?" + mode: "nullable" + - name: "dirindirintcd" + type: "string" + description: "Direct indirect interest?" + mode: "nullable" + - name: "excesshldcd" + type: "string" + description: "Excess business holdings?" + mode: "nullable" + - name: "invstjexmptcd" + type: "string" + description: "Jeopardizing investments?" + mode: "nullable" + - name: "prevjexmptcd" + type: "string" + description: "Prior year jeopardizing investments?" + mode: "nullable" + - name: "propgndacd" + type: "string" + description: "Propaganda?" + mode: "nullable" + - name: "ipubelectcd" + type: "string" + description: "Influence public election?" + mode: "nullable" + - name: "grntindivcd" + type: "string" + description: "Grant individual?" + mode: "nullable" + - name: "nchrtygrntcd" + type: "string" + description: "Non-charity grant?" + mode: "nullable" + - name: "nreligiouscd" + type: "string" + description: "Non-religious?" + mode: "nullable" + - name: "excptransind" + type: "string" + description: "Transactions fail to qualify under section 53.4945?" + mode: "nullable" + - name: "rfprsnlbnftind" + type: "string" + description: "Receive funds to pay premiums on personal benefit contract?" + mode: "nullable" + - name: "pyprsnlbnftind" + type: "string" + description: "Pay premiums on personal benefit contract?" + mode: "nullable" + - name: "tfairmrktunuse" + type: "integer" + description: "Fair market value of assets not used for charitable purposes" + mode: "nullable" + - name: "valncharitassets" + type: "integer" + description: "Net value of noncharitable-use assets" + mode: "nullable" + - name: "cmpmininvstret" + type: "integer" + description: "Minimum investment return" + mode: "nullable" + - name: "distribamt" + type: "integer" + description: "Distributable amount" + mode: "nullable" + - name: "undistribincyr" + type: "integer" + description: "Undistributed income" + mode: "nullable" + - name: "adjnetinccola" + type: "integer" + description: "Adjusted net income column a" + mode: "nullable" + - name: "adjnetinccolb" + type: "integer" + description: "Adjusted net income column b" + mode: "nullable" + - name: "adjnetinccolc" + type: "integer" + description: "Adjusted net income column c" + mode: "nullable" + - name: "adjnetinccold" + type: "integer" + description: "Adjusted net income column d" + mode: "nullable" + - name: "adjnetinctot" + type: "integer" + description: "Adjusted net income total" + mode: "nullable" + - name: "qlfydistriba" + type: "integer" + description: "Qualifying distributions column a" + mode: "nullable" + - name: "qlfydistribb" + type: "integer" + description: "Qualifying distributions column b" + mode: "nullable" + - name: "qlfydistribc" + type: "integer" + description: "Qualifying distributions column c" + mode: "nullable" + - name: "qlfydistribd" + type: "integer" + description: "Qualifying distributions column d" + mode: "nullable" + - name: "qlfydistribtot" + type: "integer" + description: "Qualifying distributions total" + mode: "nullable" + - name: "valassetscola" + type: "integer" + description: "Value assets column a" + mode: "nullable" + - name: "valassetscolb" + type: "integer" + description: "Value assets column b" + mode: "nullable" + - name: "valassetscolc" + type: "integer" + description: "Value assets column c" + mode: "nullable" + - name: "valassetscold" + type: "integer" + description: "Value assets column d" + mode: "nullable" + - name: "valassetstot" + type: "integer" + description: "Value assets total" + mode: "nullable" + - name: "qlfyasseta" + type: "integer" + description: "Qualifying assets column a" + mode: "nullable" + - name: "qlfyassetb" + type: "integer" + description: "Qualifying assets column b" + mode: "nullable" + - name: "qlfyassetc" + type: "integer" + description: "Qualifying assets column c" + mode: "nullable" + - name: "qlfyassetd" + type: "integer" + description: "Qualifying assets column d" + mode: "nullable" + - name: "qlfyassettot" + type: "integer" + description: "Qualifying assets total" + mode: "nullable" + - name: "endwmntscola" + type: "integer" + description: "Endowments column a" + mode: "nullable" + - name: "endwmntscolb" + type: "integer" + description: "Endowments column b" + mode: "nullable" + - name: "endwmntscolc" + type: "integer" + description: "Endowments column c" + mode: "nullable" + - name: "endwmntscold" + type: "integer" + description: "Endowments column d" + mode: "nullable" + - name: "endwmntstot" + type: "integer" + description: "Endowments total" + mode: "nullable" + - name: "totsuprtcola" + type: "integer" + description: "Total support column a" + mode: "nullable" + - name: "totsuprtcolb" + type: "integer" + description: "Total support column b" + mode: "nullable" + - name: "totsuprtcolc" + type: "integer" + description: "Total support column c" + mode: "nullable" + - name: "totsuprtcold" + type: "integer" + description: "Total support column d" + mode: "nullable" + - name: "totsuprttot" + type: "integer" + description: "Total support total" + mode: "nullable" + - name: "pubsuprtcola" + type: "integer" + description: "Public support column a" + mode: "nullable" + - name: "pubsuprtcolb" + type: "integer" + description: "Public support column b" + mode: "nullable" + - name: "pubsuprtcolc" + type: "integer" + description: "Public support column c" + mode: "nullable" + - name: "pubsuprtcold" + type: "integer" + description: "Public support column d" + mode: "nullable" + - name: "pubsuprttot" + type: "integer" + description: "Public support total" + mode: "nullable" + - name: "grsinvstinca" + type: "integer" + description: "Gross investment income column a" + mode: "nullable" + - name: "grsinvstincb" + type: "integer" + description: "Gross investment income column b" + mode: "nullable" + - name: "grsinvstincc" + type: "integer" + description: "Gross investment income column c" + mode: "nullable" + - name: "grsinvstincd" + type: "integer" + description: "Gross investment income column d" + mode: "nullable" + - name: "grsinvstinctot" + type: "integer" + description: "Gross investment income total" + mode: "nullable" + - name: "grntapprvfut" + type: "integer" + description: "Grants approved for future payment" + mode: "nullable" + - name: "progsrvcacold" + type: "integer" + description: "Program service revenue line 1a (excluded)" + mode: "nullable" + - name: "progsrvcacole" + type: "integer" + description: "Program service revenue line 1a (exempt)" + mode: "nullable" + - name: "progsrvcbcold" + type: "integer" + description: "Program service revenue line 1b (excluded)" + mode: "nullable" + - name: "progsrvcbcole" + type: "integer" + description: "Program service revenue line 1b (exempt)" + mode: "nullable" + - name: "progsrvcccold" + type: "integer" + description: "Program service revenue line 1c (excluded)" + mode: "nullable" + - name: "progsrvcccole" + type: "integer" + description: "Program service revenue line 1c (exempt)" + mode: "nullable" + - name: "progsrvcdcold" + type: "integer" + description: "Program service revenue line 1d (excluded)" + mode: "nullable" + - name: "progsrvcdcole" + type: "integer" + description: "Program service revenue line 1d (exempt)" + mode: "nullable" + - name: "progsrvcecold" + type: "integer" + description: "Program service revenue line 1e (excluded)" + mode: "nullable" + - name: "progsrvcecole" + type: "integer" + description: "Program service revenue line 1e (exempt)" + mode: "nullable" + - name: "progsrvcfcold" + type: "integer" + description: "Program service revenue line 1f (excluded)" + mode: "nullable" + - name: "progsrvcfcole" + type: "integer" + description: "Program service revenue line 1f (exempt)" + mode: "nullable" + - name: "progsrvcgcold" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (excluded)" + mode: "nullable" + - name: "progsrvcgcole" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (exempt)" + mode: "nullable" + - name: "membershpduesd" + type: "integer" + description: "Membership dues and assessments (excluded)" + mode: "nullable" + - name: "membershpduese" + type: "integer" + description: "Membership dues and assessments (exempt)" + mode: "nullable" + - name: "intonsvngsd" + type: "integer" + description: "Interest on savings and temporary cash investments (excluded)" + mode: "nullable" + - name: "intonsvngse" + type: "integer" + description: "Interest on savings and temporary cash investments (exempt)" + mode: "nullable" + - name: "dvdndsintd" + type: "integer" + description: "Dividends and interest from securities (excluded)" + mode: "nullable" + - name: "dvdndsinte" + type: "integer" + description: "Dividends and interest from securities (exempt)" + mode: "nullable" + - name: "trnsfrcashcd" + type: "string" + description: "Transfer cash to noncharitable exempt organization?" + mode: "nullable" + - name: "trnsothasstscd" + type: "string" + description: "Transfer other assets to noncharitable exempt organization?" + mode: "nullable" + - name: "salesasstscd" + type: "string" + description: "Sale of assets to noncharitable exempt organization?" + mode: "nullable" + - name: "prchsasstscd" + type: "string" + description: "Purchase of assets from noncharitable exempt organization?" + mode: "nullable" + - name: "rentlsfacltscd" + type: "string" + description: "Rental of facilities or other assets?" + mode: "nullable" + - name: "reimbrsmntscd" + type: "string" + description: "Reimbursements arrangements?" + mode: "nullable" + - name: "loansguarcd" + type: "string" + description: "Loans or other guarantees?" + mode: "nullable" + - name: "perfservicescd" + type: "string" + description: "Performance of services or membership or fundraising solicitations?" + mode: "nullable" + - name: "sharngasstscd" + type: "string" + description: "Sharing of facilities equipment mailing lists other assets or paid employees?" + mode: "nullable" graph_paths: - "irs_990_pf_2014_transform_csv >> load_irs_990_pf_2014_to_bq" - - - - - \ No newline at end of file diff --git a/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py b/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py index b535058a8..1606116c3 100644 --- a/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py +++ b/datasets/irs_990/irs_990_pf_2015/irs_990_pf_2015_dag.py @@ -39,7 +39,7 @@ name="irs_990_pf_2015", namespace="default", image_pull_policy="Always", - image="{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", env_vars={ "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/15eofinextract990pf.dat", "SOURCE_FILE": "files/data.dat", diff --git a/datasets/irs_990/irs_990_pf_2015/pipeline.yaml b/datasets/irs_990/irs_990_pf_2015/pipeline.yaml index 587af6929..82e40120b 100644 --- a/datasets/irs_990/irs_990_pf_2015/pipeline.yaml +++ b/datasets/irs_990/irs_990_pf_2015/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_pf_2015 # Description of the table - description: "irs_990_pf_2015 dataset" + description: "IRS 990 PF 2015 dataset" dag: airflow_version: 1 @@ -33,7 +33,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "@daily" + schedule_interval: "@daily" catchup: False default_view: graph @@ -58,7 +58,7 @@ dag: image_pull_policy: "Always" # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. - image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}" + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. env_vars: @@ -105,728 +105,723 @@ dag: # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" - description : "Employer Identification Number" - mode : "required" - - name : "elf" - type : "string" - description : "E-file indicator" - mode : "nullable" - - name : "tax_prd" - type : "string" - description : "Tax period (YYYYMM format)" - mode : "nullable" - - name : "eostatus" - type : "string" - description : "EO Status Code" - mode : "nullable" - - name : "tax_yr" - type : "integer" - description : "SOI Year" - mode : "nullable" - - name : "operatingcd" - type : "string" - description : "Operating foundation code" - mode : "nullable" - - name : "subcd" - type : "string" - description : "Subsection code" - mode : "nullable" - - name : "fairmrktvalamt" - type : "integer" - description : "Total assets – e-o-y fair market valu" - mode : "nullable" - - name : "grscontrgifts" - type : "integer" - description : "Contributions received" - mode : "nullable" - - name : "schedbind" - type : "string" - description : "Schedule B indicator" - mode : "nullable" - - name : "intrstrvnue" - type : "integer" - description : "Interest revenue" - mode : "nullable" - - name : "dividndsamt" - type : "integer" - description : "" - mode : "nullable" - - name : "grsrents" - type : "integer" - description : "Gross rents" - mode : "nullable" - - name : "grsslspramt" - type : "integer" - description : "Gross sales price for assets" - mode : "nullable" - - name : "costsold" - type : "integer" - description : "Cost-of-goods-sold" - mode : "nullable" - - name : "grsprofitbus" - type : "integer" - description : "Gross profit" - mode : "nullable" - - name : "otherincamt" - type : "integer" - description : "Other income" - mode : "nullable" - - name : "totrcptperbks" - type : "integer" - description : "Total revenue" - mode : "nullable" - - name : "compofficers" - type : "integer" - description : "Compensation of officers" - mode : "nullable" - - name : "pensplemplbenf" - type : "integer" - description : "Pension plans employee benefits" - mode : "nullable" - - name : "legalfeesamt" - type : "integer" - description : "Legal fees" - mode : "nullable" - - name : "accountingfees" - type : "integer" - description : "Accounting fees" - mode : "nullable" - - name : "interestamt" - type : "integer" - description : "Interest" - mode : "nullable" - - name : "depreciationamt" - type : "integer" - description : "Depreciation and depletion" - mode : "nullable" - - name : "occupancyamt" - type : "integer" - description : "Occupancy" - mode : "nullable" - - name : "travlconfmtngs" - type : "integer" - description : "Travel conferences and meetings" - mode : "nullable" - - name : "printingpubl" - type : "integer" - description : "Printing and publications" - mode : "nullable" - - name : "topradmnexpnsa" - type : "integer" - description : "Total operating and administrative expenses column a" - mode : "nullable" - - name : "contrpdpbks" - type : "integer" - description : "Contributions gifts grants paid" - mode : "nullable" - - name : "totexpnspbks" - type : "integer" - description : "Total expenses" - mode : "nullable" - - name : "excessrcpts" - type : "integer" - description : "Net income less deficit" - mode : "nullable" - - name : "totrcptnetinc" - type : "integer" - description : "Total receipts net investment income" - mode : "nullable" - - name : "topradmnexpnsb" - type : "integer" - description : "Total operating and administrative expenses column b" - mode : "nullable" - - name : "totexpnsnetinc" - type : "integer" - description : "Total expenses net investment income" - mode : "nullable" - - name : "netinvstinc" - type : "integer" - description : "Net investment income" - mode : "nullable" - - name : "trcptadjnetinc" - type : "integer" - description : "Total receipts adjusted net income" - mode : "nullable" - - name : "totexpnsadjnet" - type : "integer" - description : "Total expenses adjusted net income" - mode : "nullable" - - name : "adjnetinc" - type : "integer" - description : "Adjusted net income" - mode : "nullable" - - name : "topradmnexpnsd" - type : "integer" - description : "Total operating and administrative expenses column d" - mode : "nullable" - - name : "totexpnsexempt" - type : "integer" - description : "Total expenses – exempt purpose" - mode : "nullable" - - name : "othrcashamt" - type : "integer" - description : "Cash non-interest-bearing – e-o-y book value" - mode : "nullable" - - name : "invstgovtoblig" - type : "integer" - description : "Investments in U.S. & state government obligations – e-o-y book value" - mode : "nullable" - - name : "invstcorpstk" - type : "integer" - description : "Investments in corporate stock – e-o-y book value" - mode : "nullable" - - name : "invstcorpbnd" - type : "integer" - description : "Investments in corporate bonds– e-o-y book value" - mode : "nullable" - - name : "totinvstsec" - type : "integer" - description : "Total investments in securities – e-o-y book value" - mode : "nullable" - - name : "mrtgloans" - type : "integer" - description : "Investments mortgage loans – e-o-y book value" - mode : "nullable" - - name : "othrinvstend" - type : "integer" - description : "Other investments – e-o-y book value" - mode : "nullable" - - name : "othrassetseoy" - type : "integer" - description : "Other assets – e-o-y book value" - mode : "nullable" - - name : "totassetsend" - type : "integer" - description : "Total assets – e-o-y book value" - mode : "nullable" - - name : "mrtgnotespay" - type : "integer" - description : "Mortgage loans payable – e-o-y book value" - mode : "nullable" - - name : "othrliabltseoy" - type : "integer" - description : "Other liabilities – e-o-y book value" - mode : "nullable" - - name : "totliabend" - type : "integer" - description : "Total liabilities – e-o-y book value" - mode : "nullable" - - name : "tfundnworth" - type : "integer" - description : "Total fund net worth – e-o-y book value" - mode : "nullable" - - name : "fairmrktvaleoy" - type : "integer" - description : "Total assets – e-o-y fair market value" - mode : "nullable" - - name : "totexcapgnls" - type : "integer" - description : "Capital gain net income" - mode : "nullable" - - name : "totexcapgn" - type : "integer" - description : "Net gain – sales of assets" - mode : "nullable" - - name : "totexcapls" - type : "integer" - description : "Net loss – sales of assets" - mode : "nullable" - - name : "invstexcisetx" - type : "integer" - description : "Excise tax on net investment income" - mode : "nullable" - - name : "sec4940notxcd" - type : "string" - description : "Section 4940 – no tax" - mode : "nullable" - - name : "sec4940redtxcd" - type : "string" - description : "Section 4940 – 1 % tax" - mode : "nullable" - - name : "sect511tx" - type : "integer" - description : "Section 511 tax" - mode : "nullable" - - name : "subtitleatx" - type : "integer" - description : "Subtitle A tax" - mode : "nullable" - - name : "totaxpyr" - type : "integer" - description : "Total excise tax" - mode : "nullable" - - name : "esttaxcr" - type : "integer" - description : "Estimated tax credit" - mode : "nullable" - - name : "txwithldsrc" - type : "integer" - description : "Tax withheld at source" - mode : "nullable" - - name : "txpaidf2758" - type : "integer" - description : "Tax paid with Form 2758 (filing extension)" - mode : "nullable" - - name : "erronbkupwthld" - type : "integer" - description : "Erroneous backup withholding credit amount" - mode : "nullable" - - name : "estpnlty" - type : "integer" - description : "Estimated tax penalty" - mode : "nullable" - - name : "taxdue" - type : "integer" - description : "Tax due" - mode : "nullable" - - name : "overpay" - type : "integer" - description : "Overpayment" - mode : "nullable" - - name : "crelamt" - type : "integer" - description : "Credit elect amount" - mode : "nullable" - - name : "infleg" - type : "string" - description : "Influence legislation?" - mode : "nullable" - - name : "actnotpr" - type : "string" - description : "Activities not previously reported?" - mode : "nullable" - - name : "chgnprvrptcd" - type : "string" - description : "Changes not previously reported?" - mode : "nullable" - - name : "filedf990tcd" - type : "string" - description : "Filed 990-T?" - mode : "nullable" - - name : "contractncd" - type : "string" - description : "Contraction?" - mode : "nullable" - - name : "furnishcpycd" - type : "string" - description : "Furnished copy to Attorney General?" - mode : "nullable" - - name : "claimstatcd" - type : "string" - description : "Claiming status?" - mode : "nullable" - - name : "cntrbtrstxyrcd" - type : "string" - description : "Substantial contributors?" - mode : "nullable" - - name : "distribdafcd" - type : "string" - description : "Distribution to donor advised fund with advisory privileges?" - mode : "nullable" - - name : "orgcmplypubcd" - type : "string" - description : "Comply with public inspection?" - mode : "nullable" - - name : "filedlf1041ind" - type : "string" - description : "Comply with public inspection?" - mode : "nullable" - - name : "propexchcd" - type : "string" - description : "Property exchange?" - mode : "nullable" - - name : "brwlndmnycd" - type : "string" - description : "Borrow lend money?" - mode : "nullable" - - name : "furngoodscd" - type : "string" - description : "Furnished goods?" - mode : "nullable" - - name : "paidcmpncd" - type : "string" - description : "Paid compensation?" - mode : "nullable" - - name : "transfercd" - type : "string" - description : "Transfer?" - mode : "nullable" - - name : "agremkpaycd" - type : "string" - description : "Agree to make pay?" - mode : "nullable" - - name : "exceptactsind" - type : "string" - description : "Acts fail to qualify under section 53.4941(d)-3?" - mode : "nullable" - - name : "prioractvcd" - type : "string" - description : "Engage in acts in prior year?" - mode : "nullable" - - name : "undistrinccd" - type : "string" - description : "Undistributed income?" - mode : "nullable" - - name : "applyprovind" - type : "string" - description : "Not applying section 4942(a)(2) provisions?" - mode : "nullable" - - name : "dirindirintcd" - type : "string" - description : "Direct indirect interest?" - mode : "nullable" - - name : "excesshldcd" - type : "string" - description : "Excess business holdings?" - mode : "nullable" - - name : "invstjexmptcd" - type : "string" - description : "Jeopardizing investments?" - mode : "nullable" - - name : "prevjexmptcd" - type : "string" - description : "Prior year jeopardizing investments?" - mode : "nullable" - - name : "propgndacd" - type : "string" - description : "Propaganda?" - mode : "nullable" - - name : "ipubelectcd" - type : "string" - description : "Influence public election?" - mode : "nullable" - - name : "grntindivcd" - type : "string" - description : "Grant individual?" - mode : "nullable" - - name : "nchrtygrntcd" - type : "string" - description : "Non-charity grant?" - mode : "nullable" - - name : "nreligiouscd" - type : "string" - description : "Non-religious?" - mode : "nullable" - - name : "excptransind" - type : "string" - description : "Transactions fail to qualify under section 53.4945?" - mode : "nullable" - - name : "rfprsnlbnftind" - type : "string" - description : "Receive funds to pay premiums on personal benefit contract?" - mode : "nullable" - - name : "pyprsnlbnftind" - type : "string" - description : "Pay premiums on personal benefit contract?" - mode : "nullable" - - name : "tfairmrktunuse" - type : "integer" - description : "Fair market value of assets not used for charitable purposes" - mode : "nullable" - - name : "valncharitassets" - type : "integer" - description : "Net value of noncharitable-use assets" - mode : "nullable" - - name : "cmpmininvstret" - type : "integer" - description : "Minimum investment return" - mode : "nullable" - - name : "distribamt" - type : "integer" - description : "Distributable amount" - mode : "nullable" - - name : "undistribincyr" - type : "integer" - description : "Undistributed income" - mode : "nullable" - - name : "adjnetinccola" - type : "integer" - description : "Adjusted net income column a" - mode : "nullable" - - name : "adjnetinccolb" - type : "integer" - description : "Adjusted net income column b" - mode : "nullable" - - name : "adjnetinccolc" - type : "integer" - description : "Adjusted net income column c" - mode : "nullable" - - name : "adjnetinccold" - type : "integer" - description : "Adjusted net income column d" - mode : "nullable" - - name : "adjnetinctot" - type : "integer" - description : "Adjusted net income total" - mode : "nullable" - - name : "qlfydistriba" - type : "integer" - description : "Qualifying distributions column a" - mode : "nullable" - - name : "qlfydistribb" - type : "integer" - description : "Qualifying distributions column b" - mode : "nullable" - - name : "qlfydistribc" - type : "integer" - description : "Qualifying distributions column c" - mode : "nullable" - - name : "qlfydistribd" - type : "integer" - description : "Qualifying distributions column d" - mode : "nullable" - - name : "qlfydistribtot" - type : "integer" - description : "Qualifying distributions total" - mode : "nullable" - - name : "valassetscola" - type : "integer" - description : "Value assets column a" - mode : "nullable" - - name : "valassetscolb" - type : "integer" - description : "Value assets column b" - mode : "nullable" - - name : "valassetscolc" - type : "integer" - description : "Value assets column c" - mode : "nullable" - - name : "valassetscold" - type : "integer" - description : "Value assets column d" - mode : "nullable" - - name : "valassetstot" - type : "integer" - description : "Value assets total" - mode : "nullable" - - name : "qlfyasseta" - type : "integer" - description : "Qualifying assets column a" - mode : "nullable" - - name : "qlfyassetb" - type : "integer" - description : "Qualifying assets column b" - mode : "nullable" - - name : "qlfyassetc" - type : "integer" - description : "Qualifying assets column c" - mode : "nullable" - - name : "qlfyassetd" - type : "integer" - description : "Qualifying assets column d" - mode : "nullable" - - name : "qlfyassettot" - type : "integer" - description : "Qualifying assets total" - mode : "nullable" - - name : "endwmntscola" - type : "integer" - description : "Endowments column a" - mode : "nullable" - - name : "endwmntscolb" - type : "integer" - description : "Endowments column b" - mode : "nullable" - - name : "endwmntscolc" - type : "integer" - description : "Endowments column c" - mode : "nullable" - - name : "endwmntscold" - type : "integer" - description : "Endowments column d" - mode : "nullable" - - name : "endwmntstot" - type : "integer" - description : "Endowments total" - mode : "nullable" - - name : "totsuprtcola" - type : "integer" - description : "Total support column a" - mode : "nullable" - - name : "totsuprtcolb" - type : "integer" - description : "Total support column b" - mode : "nullable" - - name : "totsuprtcolc" - type : "integer" - description : "Total support column c" - mode : "nullable" - - name : "totsuprtcold" - type : "integer" - description : "Total support column d" - mode : "nullable" - - name : "totsuprttot" - type : "integer" - description : "Total support total" - mode : "nullable" - - name : "pubsuprtcola" - type : "integer" - description : "Public support column a" - mode : "nullable" - - name : "pubsuprtcolb" - type : "integer" - description : "Public support column b" - mode : "nullable" - - name : "pubsuprtcolc" - type : "integer" - description : "Public support column c" - mode : "nullable" - - name : "pubsuprtcold" - type : "integer" - description : "Public support column d" - mode : "nullable" - - name : "pubsuprttot" - type : "integer" - description : "Public support total" - mode : "nullable" - - name : "grsinvstinca" - type : "integer" - description : "Gross investment income column a" - mode : "nullable" - - name : "grsinvstincb" - type : "integer" - description : "Gross investment income column b" - mode : "nullable" - - name : "grsinvstincc" - type : "integer" - description : "Gross investment income column c" - mode : "nullable" - - name : "grsinvstincd" - type : "integer" - description : "Gross investment income column d" - mode : "nullable" - - name : "grsinvstinctot" - type : "integer" - description : "Gross investment income total" - mode : "nullable" - - name : "grntapprvfut" - type : "integer" - description : "Grants approved for future payment" - mode : "nullable" - - name : "progsrvcacold" - type : "integer" - description : "Program service revenue line 1a (excluded)" - mode : "nullable" - - name : "progsrvcacole" - type : "integer" - description : "Program service revenue line 1a (exempt)" - mode : "nullable" - - name : "progsrvcbcold" - type : "integer" - description : "Program service revenue line 1b (excluded)" - mode : "nullable" - - name : "progsrvcbcole" - type : "integer" - description : "Program service revenue line 1b (exempt)" - mode : "nullable" - - name : "progsrvcccold" - type : "integer" - description : "Program service revenue line 1c (excluded)" - mode : "nullable" - - name : "progsrvcccole" - type : "integer" - description : "Program service revenue line 1c (exempt)" - mode : "nullable" - - name : "progsrvcdcold" - type : "integer" - description : "Program service revenue line 1d (excluded)" - mode : "nullable" - - name : "progsrvcdcole" - type : "integer" - description : "Program service revenue line 1d (exempt)" - mode : "nullable" - - name : "progsrvcecold" - type : "integer" - description : "Program service revenue line 1e (excluded)" - mode : "nullable" - - name : "progsrvcecole" - type : "integer" - description : "Program service revenue line 1e (exempt)" - mode : "nullable" - - name : "progsrvcfcold" - type : "integer" - description : "Program service revenue line 1f (excluded)" - mode : "nullable" - - name : "progsrvcfcole" - type : "integer" - description : "Program service revenue line 1f (exempt)" - mode : "nullable" - - name : "progsrvcgcold" - type : "integer" - description : "Program service revenue--fees and contracts from government line 1g (excluded)" - mode : "nullable" - - name : "progsrvcgcole" - type : "integer" - description : "Program service revenue--fees and contracts from government line 1g (exempt)" - mode : "nullable" - - name : "membershpduesd" - type : "integer" - description : "Membership dues and assessments (excluded)" - mode : "nullable" - - name : "membershpduese" - type : "integer" - description : "Membership dues and assessments (exempt)" - mode : "nullable" - - name : "intonsvngsd" - type : "integer" - description : "Interest on savings and temporary cash investments (excluded)" - mode : "nullable" - - name : "intonsvngse" - type : "integer" - description : "Interest on savings and temporary cash investments (exempt)" - mode : "nullable" - - name : "dvdndsintd" - type : "integer" - description : "Dividends and interest from securities (excluded)" - mode : "nullable" - - name : "dvdndsinte" - type : "integer" - description : "Dividends and interest from securities (exempt)" - mode : "nullable" - - name : "trnsfrcashcd" - type : "string" - description : "Transfer cash to noncharitable exempt organization?" - mode : "nullable" - - name : "trnsothasstscd" - type : "string" - description : "Transfer other assets to noncharitable exempt organization?" - mode : "nullable" - - name : "salesasstscd" - type : "string" - description : "Sale of assets to noncharitable exempt organization?" - mode : "nullable" - - name : "prchsasstscd" - type : "string" - description : "Purchase of assets from noncharitable exempt organization?" - mode : "nullable" - - name : "rentlsfacltscd" - type : "string" - description : "Rental of facilities or other assets?" - mode : "nullable" - - name : "reimbrsmntscd" - type : "string" - description : "Reimbursements arrangements?" - mode : "nullable" - - name : "loansguarcd" - type : "string" - description : "Loans or other guarantees?" - mode : "nullable" - - name : "perfservicescd" - type : "string" - description : "Performance of services or membership or fundraising solicitations?" - mode : "nullable" - - name : "sharngasstscd" - type : "string" - description : "Sharing of facilities equipment mailing lists other assets or paid employees?" - mode : "nullable" + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "elf" + type: "string" + description: "E-file indicator" + mode: "nullable" + - name: "tax_prd" + type: "string" + description: "Tax period (YYYYMM format)" + mode: "nullable" + - name: "eostatus" + type: "string" + description: "EO Status Code" + mode: "nullable" + - name: "tax_yr" + type: "integer" + description: "SOI Year" + mode: "nullable" + - name: "operatingcd" + type: "string" + description: "Operating foundation code" + mode: "nullable" + - name: "subcd" + type: "string" + description: "Subsection code" + mode: "nullable" + - name: "fairmrktvalamt" + type: "integer" + description: "Total assets – e-o-y fair market valu" + mode: "nullable" + - name: "grscontrgifts" + type: "integer" + description: "Contributions received" + mode: "nullable" + - name: "schedbind" + type: "string" + description: "Schedule B indicator" + mode: "nullable" + - name: "intrstrvnue" + type: "integer" + description: "Interest revenue" + mode: "nullable" + - name: "dividndsamt" + type: "integer" + description: "" + mode: "nullable" + - name: "grsrents" + type: "integer" + description: "Gross rents" + mode: "nullable" + - name: "grsslspramt" + type: "integer" + description: "Gross sales price for assets" + mode: "nullable" + - name: "costsold" + type: "integer" + description: "Cost-of-goods-sold" + mode: "nullable" + - name: "grsprofitbus" + type: "integer" + description: "Gross profit" + mode: "nullable" + - name: "otherincamt" + type: "integer" + description: "Other income" + mode: "nullable" + - name: "totrcptperbks" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "compofficers" + type: "integer" + description: "Compensation of officers" + mode: "nullable" + - name: "pensplemplbenf" + type: "integer" + description: "Pension plans employee benefits" + mode: "nullable" + - name: "legalfeesamt" + type: "integer" + description: "Legal fees" + mode: "nullable" + - name: "accountingfees" + type: "integer" + description: "Accounting fees" + mode: "nullable" + - name: "interestamt" + type: "integer" + description: "Interest" + mode: "nullable" + - name: "depreciationamt" + type: "integer" + description: "Depreciation and depletion" + mode: "nullable" + - name: "occupancyamt" + type: "integer" + description: "Occupancy" + mode: "nullable" + - name: "travlconfmtngs" + type: "integer" + description: "Travel conferences and meetings" + mode: "nullable" + - name: "printingpubl" + type: "integer" + description: "Printing and publications" + mode: "nullable" + - name: "topradmnexpnsa" + type: "integer" + description: "Total operating and administrative expenses column a" + mode: "nullable" + - name: "contrpdpbks" + type: "integer" + description: "Contributions gifts grants paid" + mode: "nullable" + - name: "totexpnspbks" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "excessrcpts" + type: "integer" + description: "Net income less deficit" + mode: "nullable" + - name: "totrcptnetinc" + type: "integer" + description: "Total receipts net investment income" + mode: "nullable" + - name: "topradmnexpnsb" + type: "integer" + description: "Total operating and administrative expenses column b" + mode: "nullable" + - name: "totexpnsnetinc" + type: "integer" + description: "Total expenses net investment income" + mode: "nullable" + - name: "netinvstinc" + type: "integer" + description: "Net investment income" + mode: "nullable" + - name: "trcptadjnetinc" + type: "integer" + description: "Total receipts adjusted net income" + mode: "nullable" + - name: "totexpnsadjnet" + type: "integer" + description: "Total expenses adjusted net income" + mode: "nullable" + - name: "adjnetinc" + type: "integer" + description: "Adjusted net income" + mode: "nullable" + - name: "topradmnexpnsd" + type: "integer" + description: "Total operating and administrative expenses column d" + mode: "nullable" + - name: "totexpnsexempt" + type: "integer" + description: "Total expenses – exempt purpose" + mode: "nullable" + - name: "othrcashamt" + type: "integer" + description: "Cash non-interest-bearing – e-o-y book value" + mode: "nullable" + - name: "invstgovtoblig" + type: "integer" + description: "Investments in U.S. & state government obligations – e-o-y book value" + mode: "nullable" + - name: "invstcorpstk" + type: "integer" + description: "Investments in corporate stock – e-o-y book value" + mode: "nullable" + - name: "invstcorpbnd" + type: "integer" + description: "Investments in corporate bonds– e-o-y book value" + mode: "nullable" + - name: "totinvstsec" + type: "integer" + description: "Total investments in securities – e-o-y book value" + mode: "nullable" + - name: "mrtgloans" + type: "integer" + description: "Investments mortgage loans – e-o-y book value" + mode: "nullable" + - name: "othrinvstend" + type: "integer" + description: "Other investments – e-o-y book value" + mode: "nullable" + - name: "othrassetseoy" + type: "integer" + description: "Other assets – e-o-y book value" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets – e-o-y book value" + mode: "nullable" + - name: "mrtgnotespay" + type: "integer" + description: "Mortgage loans payable – e-o-y book value" + mode: "nullable" + - name: "othrliabltseoy" + type: "integer" + description: "Other liabilities – e-o-y book value" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities – e-o-y book value" + mode: "nullable" + - name: "tfundnworth" + type: "integer" + description: "Total fund net worth – e-o-y book value" + mode: "nullable" + - name: "fairmrktvaleoy" + type: "integer" + description: "Total assets – e-o-y fair market value" + mode: "nullable" + - name: "totexcapgnls" + type: "integer" + description: "Capital gain net income" + mode: "nullable" + - name: "totexcapgn" + type: "integer" + description: "Net gain – sales of assets" + mode: "nullable" + - name: "totexcapls" + type: "integer" + description: "Net loss – sales of assets" + mode: "nullable" + - name: "invstexcisetx" + type: "integer" + description: "Excise tax on net investment income" + mode: "nullable" + - name: "sec4940notxcd" + type: "string" + description: "Section 4940 – no tax" + mode: "nullable" + - name: "sec4940redtxcd" + type: "string" + description: "Section 4940 – 1 % tax" + mode: "nullable" + - name: "sect511tx" + type: "integer" + description: "Section 511 tax" + mode: "nullable" + - name: "subtitleatx" + type: "integer" + description: "Subtitle A tax" + mode: "nullable" + - name: "totaxpyr" + type: "integer" + description: "Total excise tax" + mode: "nullable" + - name: "esttaxcr" + type: "integer" + description: "Estimated tax credit" + mode: "nullable" + - name: "txwithldsrc" + type: "integer" + description: "Tax withheld at source" + mode: "nullable" + - name: "txpaidf2758" + type: "integer" + description: "Tax paid with Form 2758 (filing extension)" + mode: "nullable" + - name: "erronbkupwthld" + type: "integer" + description: "Erroneous backup withholding credit amount" + mode: "nullable" + - name: "estpnlty" + type: "integer" + description: "Estimated tax penalty" + mode: "nullable" + - name: "taxdue" + type: "integer" + description: "Tax due" + mode: "nullable" + - name: "overpay" + type: "integer" + description: "Overpayment" + mode: "nullable" + - name: "crelamt" + type: "integer" + description: "Credit elect amount" + mode: "nullable" + - name: "infleg" + type: "string" + description: "Influence legislation?" + mode: "nullable" + - name: "actnotpr" + type: "string" + description: "Activities not previously reported?" + mode: "nullable" + - name: "chgnprvrptcd" + type: "string" + description: "Changes not previously reported?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Filed 990-T?" + mode: "nullable" + - name: "contractncd" + type: "string" + description: "Contraction?" + mode: "nullable" + - name: "furnishcpycd" + type: "string" + description: "Furnished copy to Attorney General?" + mode: "nullable" + - name: "claimstatcd" + type: "string" + description: "Claiming status?" + mode: "nullable" + - name: "cntrbtrstxyrcd" + type: "string" + description: "Substantial contributors?" + mode: "nullable" + - name: "distribdafcd" + type: "string" + description: "Distribution to donor advised fund with advisory privileges?" + mode: "nullable" + - name: "orgcmplypubcd" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "filedlf1041ind" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "propexchcd" + type: "string" + description: "Property exchange?" + mode: "nullable" + - name: "brwlndmnycd" + type: "string" + description: "Borrow lend money?" + mode: "nullable" + - name: "furngoodscd" + type: "string" + description: "Furnished goods?" + mode: "nullable" + - name: "paidcmpncd" + type: "string" + description: "Paid compensation?" + mode: "nullable" + - name: "transfercd" + type: "string" + description: "Transfer?" + mode: "nullable" + - name: "agremkpaycd" + type: "string" + description: "Agree to make pay?" + mode: "nullable" + - name: "exceptactsind" + type: "string" + description: "Acts fail to qualify under section 53.4941(d)-3?" + mode: "nullable" + - name: "prioractvcd" + type: "string" + description: "Engage in acts in prior year?" + mode: "nullable" + - name: "undistrinccd" + type: "string" + description: "Undistributed income?" + mode: "nullable" + - name: "applyprovind" + type: "string" + description: "Not applying section 4942(a)(2) provisions?" + mode: "nullable" + - name: "dirindirintcd" + type: "string" + description: "Direct indirect interest?" + mode: "nullable" + - name: "excesshldcd" + type: "string" + description: "Excess business holdings?" + mode: "nullable" + - name: "invstjexmptcd" + type: "string" + description: "Jeopardizing investments?" + mode: "nullable" + - name: "prevjexmptcd" + type: "string" + description: "Prior year jeopardizing investments?" + mode: "nullable" + - name: "propgndacd" + type: "string" + description: "Propaganda?" + mode: "nullable" + - name: "ipubelectcd" + type: "string" + description: "Influence public election?" + mode: "nullable" + - name: "grntindivcd" + type: "string" + description: "Grant individual?" + mode: "nullable" + - name: "nchrtygrntcd" + type: "string" + description: "Non-charity grant?" + mode: "nullable" + - name: "nreligiouscd" + type: "string" + description: "Non-religious?" + mode: "nullable" + - name: "excptransind" + type: "string" + description: "Transactions fail to qualify under section 53.4945?" + mode: "nullable" + - name: "rfprsnlbnftind" + type: "string" + description: "Receive funds to pay premiums on personal benefit contract?" + mode: "nullable" + - name: "pyprsnlbnftind" + type: "string" + description: "Pay premiums on personal benefit contract?" + mode: "nullable" + - name: "tfairmrktunuse" + type: "integer" + description: "Fair market value of assets not used for charitable purposes" + mode: "nullable" + - name: "valncharitassets" + type: "integer" + description: "Net value of noncharitable-use assets" + mode: "nullable" + - name: "cmpmininvstret" + type: "integer" + description: "Minimum investment return" + mode: "nullable" + - name: "distribamt" + type: "integer" + description: "Distributable amount" + mode: "nullable" + - name: "undistribincyr" + type: "integer" + description: "Undistributed income" + mode: "nullable" + - name: "adjnetinccola" + type: "integer" + description: "Adjusted net income column a" + mode: "nullable" + - name: "adjnetinccolb" + type: "integer" + description: "Adjusted net income column b" + mode: "nullable" + - name: "adjnetinccolc" + type: "integer" + description: "Adjusted net income column c" + mode: "nullable" + - name: "adjnetinccold" + type: "integer" + description: "Adjusted net income column d" + mode: "nullable" + - name: "adjnetinctot" + type: "integer" + description: "Adjusted net income total" + mode: "nullable" + - name: "qlfydistriba" + type: "integer" + description: "Qualifying distributions column a" + mode: "nullable" + - name: "qlfydistribb" + type: "integer" + description: "Qualifying distributions column b" + mode: "nullable" + - name: "qlfydistribc" + type: "integer" + description: "Qualifying distributions column c" + mode: "nullable" + - name: "qlfydistribd" + type: "integer" + description: "Qualifying distributions column d" + mode: "nullable" + - name: "qlfydistribtot" + type: "integer" + description: "Qualifying distributions total" + mode: "nullable" + - name: "valassetscola" + type: "integer" + description: "Value assets column a" + mode: "nullable" + - name: "valassetscolb" + type: "integer" + description: "Value assets column b" + mode: "nullable" + - name: "valassetscolc" + type: "integer" + description: "Value assets column c" + mode: "nullable" + - name: "valassetscold" + type: "integer" + description: "Value assets column d" + mode: "nullable" + - name: "valassetstot" + type: "integer" + description: "Value assets total" + mode: "nullable" + - name: "qlfyasseta" + type: "integer" + description: "Qualifying assets column a" + mode: "nullable" + - name: "qlfyassetb" + type: "integer" + description: "Qualifying assets column b" + mode: "nullable" + - name: "qlfyassetc" + type: "integer" + description: "Qualifying assets column c" + mode: "nullable" + - name: "qlfyassetd" + type: "integer" + description: "Qualifying assets column d" + mode: "nullable" + - name: "qlfyassettot" + type: "integer" + description: "Qualifying assets total" + mode: "nullable" + - name: "endwmntscola" + type: "integer" + description: "Endowments column a" + mode: "nullable" + - name: "endwmntscolb" + type: "integer" + description: "Endowments column b" + mode: "nullable" + - name: "endwmntscolc" + type: "integer" + description: "Endowments column c" + mode: "nullable" + - name: "endwmntscold" + type: "integer" + description: "Endowments column d" + mode: "nullable" + - name: "endwmntstot" + type: "integer" + description: "Endowments total" + mode: "nullable" + - name: "totsuprtcola" + type: "integer" + description: "Total support column a" + mode: "nullable" + - name: "totsuprtcolb" + type: "integer" + description: "Total support column b" + mode: "nullable" + - name: "totsuprtcolc" + type: "integer" + description: "Total support column c" + mode: "nullable" + - name: "totsuprtcold" + type: "integer" + description: "Total support column d" + mode: "nullable" + - name: "totsuprttot" + type: "integer" + description: "Total support total" + mode: "nullable" + - name: "pubsuprtcola" + type: "integer" + description: "Public support column a" + mode: "nullable" + - name: "pubsuprtcolb" + type: "integer" + description: "Public support column b" + mode: "nullable" + - name: "pubsuprtcolc" + type: "integer" + description: "Public support column c" + mode: "nullable" + - name: "pubsuprtcold" + type: "integer" + description: "Public support column d" + mode: "nullable" + - name: "pubsuprttot" + type: "integer" + description: "Public support total" + mode: "nullable" + - name: "grsinvstinca" + type: "integer" + description: "Gross investment income column a" + mode: "nullable" + - name: "grsinvstincb" + type: "integer" + description: "Gross investment income column b" + mode: "nullable" + - name: "grsinvstincc" + type: "integer" + description: "Gross investment income column c" + mode: "nullable" + - name: "grsinvstincd" + type: "integer" + description: "Gross investment income column d" + mode: "nullable" + - name: "grsinvstinctot" + type: "integer" + description: "Gross investment income total" + mode: "nullable" + - name: "grntapprvfut" + type: "integer" + description: "Grants approved for future payment" + mode: "nullable" + - name: "progsrvcacold" + type: "integer" + description: "Program service revenue line 1a (excluded)" + mode: "nullable" + - name: "progsrvcacole" + type: "integer" + description: "Program service revenue line 1a (exempt)" + mode: "nullable" + - name: "progsrvcbcold" + type: "integer" + description: "Program service revenue line 1b (excluded)" + mode: "nullable" + - name: "progsrvcbcole" + type: "integer" + description: "Program service revenue line 1b (exempt)" + mode: "nullable" + - name: "progsrvcccold" + type: "integer" + description: "Program service revenue line 1c (excluded)" + mode: "nullable" + - name: "progsrvcccole" + type: "integer" + description: "Program service revenue line 1c (exempt)" + mode: "nullable" + - name: "progsrvcdcold" + type: "integer" + description: "Program service revenue line 1d (excluded)" + mode: "nullable" + - name: "progsrvcdcole" + type: "integer" + description: "Program service revenue line 1d (exempt)" + mode: "nullable" + - name: "progsrvcecold" + type: "integer" + description: "Program service revenue line 1e (excluded)" + mode: "nullable" + - name: "progsrvcecole" + type: "integer" + description: "Program service revenue line 1e (exempt)" + mode: "nullable" + - name: "progsrvcfcold" + type: "integer" + description: "Program service revenue line 1f (excluded)" + mode: "nullable" + - name: "progsrvcfcole" + type: "integer" + description: "Program service revenue line 1f (exempt)" + mode: "nullable" + - name: "progsrvcgcold" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (excluded)" + mode: "nullable" + - name: "progsrvcgcole" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (exempt)" + mode: "nullable" + - name: "membershpduesd" + type: "integer" + description: "Membership dues and assessments (excluded)" + mode: "nullable" + - name: "membershpduese" + type: "integer" + description: "Membership dues and assessments (exempt)" + mode: "nullable" + - name: "intonsvngsd" + type: "integer" + description: "Interest on savings and temporary cash investments (excluded)" + mode: "nullable" + - name: "intonsvngse" + type: "integer" + description: "Interest on savings and temporary cash investments (exempt)" + mode: "nullable" + - name: "dvdndsintd" + type: "integer" + description: "Dividends and interest from securities (excluded)" + mode: "nullable" + - name: "dvdndsinte" + type: "integer" + description: "Dividends and interest from securities (exempt)" + mode: "nullable" + - name: "trnsfrcashcd" + type: "string" + description: "Transfer cash to noncharitable exempt organization?" + mode: "nullable" + - name: "trnsothasstscd" + type: "string" + description: "Transfer other assets to noncharitable exempt organization?" + mode: "nullable" + - name: "salesasstscd" + type: "string" + description: "Sale of assets to noncharitable exempt organization?" + mode: "nullable" + - name: "prchsasstscd" + type: "string" + description: "Purchase of assets from noncharitable exempt organization?" + mode: "nullable" + - name: "rentlsfacltscd" + type: "string" + description: "Rental of facilities or other assets?" + mode: "nullable" + - name: "reimbrsmntscd" + type: "string" + description: "Reimbursements arrangements?" + mode: "nullable" + - name: "loansguarcd" + type: "string" + description: "Loans or other guarantees?" + mode: "nullable" + - name: "perfservicescd" + type: "string" + description: "Performance of services or membership or fundraising solicitations?" + mode: "nullable" + - name: "sharngasstscd" + type: "string" + description: "Sharing of facilities equipment mailing lists other assets or paid employees?" + mode: "nullable" graph_paths: - "irs_990_pf_2015_transform_csv >> load_irs_990_pf_2015_to_bq" - - - - - \ No newline at end of file diff --git a/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py b/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py index 2bdbec198..932e77562 100644 --- a/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py +++ b/datasets/irs_990/irs_990_pf_2016/irs_990_pf_2016_dag.py @@ -39,7 +39,7 @@ name="irs_990_pf_2016", namespace="default", image_pull_policy="Always", - image="{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}", + image="{{ var.json.irs_990.container_registry.run_csv_transform_kub }}", env_vars={ "SOURCE_URL": "https://www.irs.gov/pub/irs-soi/16eofinextract990pf.dat", "SOURCE_FILE": "files/data.dat", diff --git a/datasets/irs_990/irs_990_pf_2016/pipeline.yaml b/datasets/irs_990/irs_990_pf_2016/pipeline.yaml index 26877d559..014c7f0a5 100644 --- a/datasets/irs_990/irs_990_pf_2016/pipeline.yaml +++ b/datasets/irs_990/irs_990_pf_2016/pipeline.yaml @@ -20,7 +20,7 @@ resources: table_id: irs_990_pf_2016 # Description of the table - description: "irs_990_pf_2016 dataset" + description: "IRS 990 PF 2016 dataset" dag: airflow_version: 1 @@ -33,7 +33,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "@daily" + schedule_interval: "@daily" catchup: False default_view: graph @@ -58,7 +58,7 @@ dag: image_pull_policy: "Always" # Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag. - image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub_pf }}" + image: "{{ var.json.irs_990.container_registry.run_csv_transform_kub }}" # Set the environment variables you need initialized in the container. Use these as input variables for the script your container is expected to perform. env_vars: @@ -106,728 +106,723 @@ dag: # i.e. specify modes for all columns. schema_fields: - - name : "ein" - type : "string" - description : "Employer Identification Number" - mode : "required" - - name : "elf" - type : "string" - description : "E-file indicator" - mode : "nullable" - - name : "tax_prd" - type : "string" - description : "Tax period (YYYYMM format)" - mode : "nullable" - - name : "eostatus" - type : "string" - description : "EO Status Code" - mode : "nullable" - - name : "tax_yr" - type : "integer" - description : "SOI Year" - mode : "nullable" - - name : "operatingcd" - type : "string" - description : "Operating foundation code" - mode : "nullable" - - name : "subcd" - type : "string" - description : "Subsection code" - mode : "nullable" - - name : "fairmrktvalamt" - type : "integer" - description : "Total assets – e-o-y fair market valu" - mode : "nullable" - - name : "grscontrgifts" - type : "integer" - description : "Contributions received" - mode : "nullable" - - name : "schedbind" - type : "string" - description : "Schedule B indicator" - mode : "nullable" - - name : "intrstrvnue" - type : "integer" - description : "Interest revenue" - mode : "nullable" - - name : "dividndsamt" - type : "integer" - description : "" - mode : "nullable" - - name : "grsrents" - type : "integer" - description : "Gross rents" - mode : "nullable" - - name : "grsslspramt" - type : "integer" - description : "Gross sales price for assets" - mode : "nullable" - - name : "costsold" - type : "integer" - description : "Cost-of-goods-sold" - mode : "nullable" - - name : "grsprofitbus" - type : "integer" - description : "Gross profit" - mode : "nullable" - - name : "otherincamt" - type : "integer" - description : "Other income" - mode : "nullable" - - name : "totrcptperbks" - type : "integer" - description : "Total revenue" - mode : "nullable" - - name : "compofficers" - type : "integer" - description : "Compensation of officers" - mode : "nullable" - - name : "pensplemplbenf" - type : "integer" - description : "Pension plans employee benefits" - mode : "nullable" - - name : "legalfeesamt" - type : "integer" - description : "Legal fees" - mode : "nullable" - - name : "accountingfees" - type : "integer" - description : "Accounting fees" - mode : "nullable" - - name : "interestamt" - type : "integer" - description : "Interest" - mode : "nullable" - - name : "depreciationamt" - type : "integer" - description : "Depreciation and depletion" - mode : "nullable" - - name : "occupancyamt" - type : "integer" - description : "Occupancy" - mode : "nullable" - - name : "travlconfmtngs" - type : "integer" - description : "Travel conferences and meetings" - mode : "nullable" - - name : "printingpubl" - type : "integer" - description : "Printing and publications" - mode : "nullable" - - name : "topradmnexpnsa" - type : "integer" - description : "Total operating and administrative expenses column a" - mode : "nullable" - - name : "contrpdpbks" - type : "integer" - description : "Contributions gifts grants paid" - mode : "nullable" - - name : "totexpnspbks" - type : "integer" - description : "Total expenses" - mode : "nullable" - - name : "excessrcpts" - type : "integer" - description : "Net income less deficit" - mode : "nullable" - - name : "totrcptnetinc" - type : "integer" - description : "Total receipts net investment income" - mode : "nullable" - - name : "topradmnexpnsb" - type : "integer" - description : "Total operating and administrative expenses column b" - mode : "nullable" - - name : "totexpnsnetinc" - type : "integer" - description : "Total expenses net investment income" - mode : "nullable" - - name : "netinvstinc" - type : "integer" - description : "Net investment income" - mode : "nullable" - - name : "trcptadjnetinc" - type : "integer" - description : "Total receipts adjusted net income" - mode : "nullable" - - name : "totexpnsadjnet" - type : "integer" - description : "Total expenses adjusted net income" - mode : "nullable" - - name : "adjnetinc" - type : "integer" - description : "Adjusted net income" - mode : "nullable" - - name : "topradmnexpnsd" - type : "integer" - description : "Total operating and administrative expenses column d" - mode : "nullable" - - name : "totexpnsexempt" - type : "integer" - description : "Total expenses – exempt purpose" - mode : "nullable" - - name : "othrcashamt" - type : "integer" - description : "Cash non-interest-bearing – e-o-y book value" - mode : "nullable" - - name : "invstgovtoblig" - type : "integer" - description : "Investments in U.S. & state government obligations – e-o-y book value" - mode : "nullable" - - name : "invstcorpstk" - type : "integer" - description : "Investments in corporate stock – e-o-y book value" - mode : "nullable" - - name : "invstcorpbnd" - type : "integer" - description : "Investments in corporate bonds– e-o-y book value" - mode : "nullable" - - name : "totinvstsec" - type : "integer" - description : "Total investments in securities – e-o-y book value" - mode : "nullable" - - name : "mrtgloans" - type : "integer" - description : "Investments mortgage loans – e-o-y book value" - mode : "nullable" - - name : "othrinvstend" - type : "integer" - description : "Other investments – e-o-y book value" - mode : "nullable" - - name : "othrassetseoy" - type : "integer" - description : "Other assets – e-o-y book value" - mode : "nullable" - - name : "totassetsend" - type : "integer" - description : "Total assets – e-o-y book value" - mode : "nullable" - - name : "mrtgnotespay" - type : "integer" - description : "Mortgage loans payable – e-o-y book value" - mode : "nullable" - - name : "othrliabltseoy" - type : "integer" - description : "Other liabilities – e-o-y book value" - mode : "nullable" - - name : "totliabend" - type : "integer" - description : "Total liabilities – e-o-y book value" - mode : "nullable" - - name : "tfundnworth" - type : "integer" - description : "Total fund net worth – e-o-y book value" - mode : "nullable" - - name : "fairmrktvaleoy" - type : "integer" - description : "Total assets – e-o-y fair market value" - mode : "nullable" - - name : "totexcapgnls" - type : "integer" - description : "Capital gain net income" - mode : "nullable" - - name : "totexcapgn" - type : "integer" - description : "Net gain – sales of assets" - mode : "nullable" - - name : "totexcapls" - type : "integer" - description : "Net loss – sales of assets" - mode : "nullable" - - name : "invstexcisetx" - type : "integer" - description : "Excise tax on net investment income" - mode : "nullable" - - name : "sec4940notxcd" - type : "string" - description : "Section 4940 – no tax" - mode : "nullable" - - name : "sec4940redtxcd" - type : "string" - description : "Section 4940 – 1 % tax" - mode : "nullable" - - name : "sect511tx" - type : "integer" - description : "Section 511 tax" - mode : "nullable" - - name : "subtitleatx" - type : "integer" - description : "Subtitle A tax" - mode : "nullable" - - name : "totaxpyr" - type : "integer" - description : "Total excise tax" - mode : "nullable" - - name : "esttaxcr" - type : "integer" - description : "Estimated tax credit" - mode : "nullable" - - name : "txwithldsrc" - type : "integer" - description : "Tax withheld at source" - mode : "nullable" - - name : "txpaidf2758" - type : "integer" - description : "Tax paid with Form 2758 (filing extension)" - mode : "nullable" - - name : "erronbkupwthld" - type : "integer" - description : "Erroneous backup withholding credit amount" - mode : "nullable" - - name : "estpnlty" - type : "integer" - description : "Estimated tax penalty" - mode : "nullable" - - name : "taxdue" - type : "integer" - description : "Tax due" - mode : "nullable" - - name : "overpay" - type : "integer" - description : "Overpayment" - mode : "nullable" - - name : "crelamt" - type : "integer" - description : "Credit elect amount" - mode : "nullable" - - name : "infleg" - type : "string" - description : "Influence legislation?" - mode : "nullable" - - name : "actnotpr" - type : "string" - description : "Activities not previously reported?" - mode : "nullable" - - name : "chgnprvrptcd" - type : "string" - description : "Changes not previously reported?" - mode : "nullable" - - name : "filedf990tcd" - type : "string" - description : "Filed 990-T?" - mode : "nullable" - - name : "contractncd" - type : "string" - description : "Contraction?" - mode : "nullable" - - name : "furnishcpycd" - type : "string" - description : "Furnished copy to Attorney General?" - mode : "nullable" - - name : "claimstatcd" - type : "string" - description : "Claiming status?" - mode : "nullable" - - name : "cntrbtrstxyrcd" - type : "string" - description : "Substantial contributors?" - mode : "nullable" - - name : "distribdafcd" - type : "string" - description : "Distribution to donor advised fund with advisory privileges?" - mode : "nullable" - - name : "orgcmplypubcd" - type : "string" - description : "Comply with public inspection?" - mode : "nullable" - - name : "filedlf1041ind" - type : "string" - description : "Comply with public inspection?" - mode : "nullable" - - name : "propexchcd" - type : "string" - description : "Property exchange?" - mode : "nullable" - - name : "brwlndmnycd" - type : "string" - description : "Borrow lend money?" - mode : "nullable" - - name : "furngoodscd" - type : "string" - description : "Furnished goods?" - mode : "nullable" - - name : "paidcmpncd" - type : "string" - description : "Paid compensation?" - mode : "nullable" - - name : "transfercd" - type : "string" - description : "Transfer?" - mode : "nullable" - - name : "agremkpaycd" - type : "string" - description : "Agree to make pay?" - mode : "nullable" - - name : "exceptactsind" - type : "string" - description : "Acts fail to qualify under section 53.4941(d)-3?" - mode : "nullable" - - name : "prioractvcd" - type : "string" - description : "Engage in acts in prior year?" - mode : "nullable" - - name : "undistrinccd" - type : "string" - description : "Undistributed income?" - mode : "nullable" - - name : "applyprovind" - type : "string" - description : "Not applying section 4942(a)(2) provisions?" - mode : "nullable" - - name : "dirindirintcd" - type : "string" - description : "Direct indirect interest?" - mode : "nullable" - - name : "excesshldcd" - type : "string" - description : "Excess business holdings?" - mode : "nullable" - - name : "invstjexmptcd" - type : "string" - description : "Jeopardizing investments?" - mode : "nullable" - - name : "prevjexmptcd" - type : "string" - description : "Prior year jeopardizing investments?" - mode : "nullable" - - name : "propgndacd" - type : "string" - description : "Propaganda?" - mode : "nullable" - - name : "ipubelectcd" - type : "string" - description : "Influence public election?" - mode : "nullable" - - name : "grntindivcd" - type : "string" - description : "Grant individual?" - mode : "nullable" - - name : "nchrtygrntcd" - type : "string" - description : "Non-charity grant?" - mode : "nullable" - - name : "nreligiouscd" - type : "string" - description : "Non-religious?" - mode : "nullable" - - name : "excptransind" - type : "string" - description : "Transactions fail to qualify under section 53.4945?" - mode : "nullable" - - name : "rfprsnlbnftind" - type : "string" - description : "Receive funds to pay premiums on personal benefit contract?" - mode : "nullable" - - name : "pyprsnlbnftind" - type : "string" - description : "Pay premiums on personal benefit contract?" - mode : "nullable" - - name : "tfairmrktunuse" - type : "integer" - description : "Fair market value of assets not used for charitable purposes" - mode : "nullable" - - name : "valncharitassets" - type : "integer" - description : "Net value of noncharitable-use assets" - mode : "nullable" - - name : "cmpmininvstret" - type : "integer" - description : "Minimum investment return" - mode : "nullable" - - name : "distribamt" - type : "integer" - description : "Distributable amount" - mode : "nullable" - - name : "undistribincyr" - type : "integer" - description : "Undistributed income" - mode : "nullable" - - name : "adjnetinccola" - type : "integer" - description : "Adjusted net income column a" - mode : "nullable" - - name : "adjnetinccolb" - type : "integer" - description : "Adjusted net income column b" - mode : "nullable" - - name : "adjnetinccolc" - type : "integer" - description : "Adjusted net income column c" - mode : "nullable" - - name : "adjnetinccold" - type : "integer" - description : "Adjusted net income column d" - mode : "nullable" - - name : "adjnetinctot" - type : "integer" - description : "Adjusted net income total" - mode : "nullable" - - name : "qlfydistriba" - type : "integer" - description : "Qualifying distributions column a" - mode : "nullable" - - name : "qlfydistribb" - type : "integer" - description : "Qualifying distributions column b" - mode : "nullable" - - name : "qlfydistribc" - type : "integer" - description : "Qualifying distributions column c" - mode : "nullable" - - name : "qlfydistribd" - type : "integer" - description : "Qualifying distributions column d" - mode : "nullable" - - name : "qlfydistribtot" - type : "integer" - description : "Qualifying distributions total" - mode : "nullable" - - name : "valassetscola" - type : "integer" - description : "Value assets column a" - mode : "nullable" - - name : "valassetscolb" - type : "integer" - description : "Value assets column b" - mode : "nullable" - - name : "valassetscolc" - type : "integer" - description : "Value assets column c" - mode : "nullable" - - name : "valassetscold" - type : "integer" - description : "Value assets column d" - mode : "nullable" - - name : "valassetstot" - type : "integer" - description : "Value assets total" - mode : "nullable" - - name : "qlfyasseta" - type : "integer" - description : "Qualifying assets column a" - mode : "nullable" - - name : "qlfyassetb" - type : "integer" - description : "Qualifying assets column b" - mode : "nullable" - - name : "qlfyassetc" - type : "integer" - description : "Qualifying assets column c" - mode : "nullable" - - name : "qlfyassetd" - type : "integer" - description : "Qualifying assets column d" - mode : "nullable" - - name : "qlfyassettot" - type : "integer" - description : "Qualifying assets total" - mode : "nullable" - - name : "endwmntscola" - type : "integer" - description : "Endowments column a" - mode : "nullable" - - name : "endwmntscolb" - type : "integer" - description : "Endowments column b" - mode : "nullable" - - name : "endwmntscolc" - type : "integer" - description : "Endowments column c" - mode : "nullable" - - name : "endwmntscold" - type : "integer" - description : "Endowments column d" - mode : "nullable" - - name : "endwmntstot" - type : "integer" - description : "Endowments total" - mode : "nullable" - - name : "totsuprtcola" - type : "integer" - description : "Total support column a" - mode : "nullable" - - name : "totsuprtcolb" - type : "integer" - description : "Total support column b" - mode : "nullable" - - name : "totsuprtcolc" - type : "integer" - description : "Total support column c" - mode : "nullable" - - name : "totsuprtcold" - type : "integer" - description : "Total support column d" - mode : "nullable" - - name : "totsuprttot" - type : "integer" - description : "Total support total" - mode : "nullable" - - name : "pubsuprtcola" - type : "integer" - description : "Public support column a" - mode : "nullable" - - name : "pubsuprtcolb" - type : "integer" - description : "Public support column b" - mode : "nullable" - - name : "pubsuprtcolc" - type : "integer" - description : "Public support column c" - mode : "nullable" - - name : "pubsuprtcold" - type : "integer" - description : "Public support column d" - mode : "nullable" - - name : "pubsuprttot" - type : "integer" - description : "Public support total" - mode : "nullable" - - name : "grsinvstinca" - type : "integer" - description : "Gross investment income column a" - mode : "nullable" - - name : "grsinvstincb" - type : "integer" - description : "Gross investment income column b" - mode : "nullable" - - name : "grsinvstincc" - type : "integer" - description : "Gross investment income column c" - mode : "nullable" - - name : "grsinvstincd" - type : "integer" - description : "Gross investment income column d" - mode : "nullable" - - name : "grsinvstinctot" - type : "integer" - description : "Gross investment income total" - mode : "nullable" - - name : "grntapprvfut" - type : "integer" - description : "Grants approved for future payment" - mode : "nullable" - - name : "progsrvcacold" - type : "integer" - description : "Program service revenue line 1a (excluded)" - mode : "nullable" - - name : "progsrvcacole" - type : "integer" - description : "Program service revenue line 1a (exempt)" - mode : "nullable" - - name : "progsrvcbcold" - type : "integer" - description : "Program service revenue line 1b (excluded)" - mode : "nullable" - - name : "progsrvcbcole" - type : "integer" - description : "Program service revenue line 1b (exempt)" - mode : "nullable" - - name : "progsrvcccold" - type : "integer" - description : "Program service revenue line 1c (excluded)" - mode : "nullable" - - name : "progsrvcccole" - type : "integer" - description : "Program service revenue line 1c (exempt)" - mode : "nullable" - - name : "progsrvcdcold" - type : "integer" - description : "Program service revenue line 1d (excluded)" - mode : "nullable" - - name : "progsrvcdcole" - type : "integer" - description : "Program service revenue line 1d (exempt)" - mode : "nullable" - - name : "progsrvcecold" - type : "integer" - description : "Program service revenue line 1e (excluded)" - mode : "nullable" - - name : "progsrvcecole" - type : "integer" - description : "Program service revenue line 1e (exempt)" - mode : "nullable" - - name : "progsrvcfcold" - type : "integer" - description : "Program service revenue line 1f (excluded)" - mode : "nullable" - - name : "progsrvcfcole" - type : "integer" - description : "Program service revenue line 1f (exempt)" - mode : "nullable" - - name : "progsrvcgcold" - type : "integer" - description : "Program service revenue--fees and contracts from government line 1g (excluded)" - mode : "nullable" - - name : "progsrvcgcole" - type : "integer" - description : "Program service revenue--fees and contracts from government line 1g (exempt)" - mode : "nullable" - - name : "membershpduesd" - type : "integer" - description : "Membership dues and assessments (excluded)" - mode : "nullable" - - name : "membershpduese" - type : "integer" - description : "Membership dues and assessments (exempt)" - mode : "nullable" - - name : "intonsvngsd" - type : "integer" - description : "Interest on savings and temporary cash investments (excluded)" - mode : "nullable" - - name : "intonsvngse" - type : "integer" - description : "Interest on savings and temporary cash investments (exempt)" - mode : "nullable" - - name : "dvdndsintd" - type : "integer" - description : "Dividends and interest from securities (excluded)" - mode : "nullable" - - name : "dvdndsinte" - type : "integer" - description : "Dividends and interest from securities (exempt)" - mode : "nullable" - - name : "trnsfrcashcd" - type : "string" - description : "Transfer cash to noncharitable exempt organization?" - mode : "nullable" - - name : "trnsothasstscd" - type : "string" - description : "Transfer other assets to noncharitable exempt organization?" - mode : "nullable" - - name : "salesasstscd" - type : "string" - description : "Sale of assets to noncharitable exempt organization?" - mode : "nullable" - - name : "prchsasstscd" - type : "string" - description : "Purchase of assets from noncharitable exempt organization?" - mode : "nullable" - - name : "rentlsfacltscd" - type : "string" - description : "Rental of facilities or other assets?" - mode : "nullable" - - name : "reimbrsmntscd" - type : "string" - description : "Reimbursements arrangements?" - mode : "nullable" - - name : "loansguarcd" - type : "string" - description : "Loans or other guarantees?" - mode : "nullable" - - name : "perfservicescd" - type : "string" - description : "Performance of services or membership or fundraising solicitations?" - mode : "nullable" - - name : "sharngasstscd" - type : "string" - description : "Sharing of facilities equipment mailing lists other assets or paid employees?" - mode : "nullable" + - name: "ein" + type: "string" + description: "Employer Identification Number" + mode: "required" + - name: "elf" + type: "string" + description: "E-file indicator" + mode: "nullable" + - name: "tax_prd" + type: "string" + description: "Tax period (YYYYMM format)" + mode: "nullable" + - name: "eostatus" + type: "string" + description: "EO Status Code" + mode: "nullable" + - name: "tax_yr" + type: "integer" + description: "SOI Year" + mode: "nullable" + - name: "operatingcd" + type: "string" + description: "Operating foundation code" + mode: "nullable" + - name: "subcd" + type: "string" + description: "Subsection code" + mode: "nullable" + - name: "fairmrktvalamt" + type: "integer" + description: "Total assets – e-o-y fair market valu" + mode: "nullable" + - name: "grscontrgifts" + type: "integer" + description: "Contributions received" + mode: "nullable" + - name: "schedbind" + type: "string" + description: "Schedule B indicator" + mode: "nullable" + - name: "intrstrvnue" + type: "integer" + description: "Interest revenue" + mode: "nullable" + - name: "dividndsamt" + type: "integer" + description: "" + mode: "nullable" + - name: "grsrents" + type: "integer" + description: "Gross rents" + mode: "nullable" + - name: "grsslspramt" + type: "integer" + description: "Gross sales price for assets" + mode: "nullable" + - name: "costsold" + type: "integer" + description: "Cost-of-goods-sold" + mode: "nullable" + - name: "grsprofitbus" + type: "integer" + description: "Gross profit" + mode: "nullable" + - name: "otherincamt" + type: "integer" + description: "Other income" + mode: "nullable" + - name: "totrcptperbks" + type: "integer" + description: "Total revenue" + mode: "nullable" + - name: "compofficers" + type: "integer" + description: "Compensation of officers" + mode: "nullable" + - name: "pensplemplbenf" + type: "integer" + description: "Pension plans employee benefits" + mode: "nullable" + - name: "legalfeesamt" + type: "integer" + description: "Legal fees" + mode: "nullable" + - name: "accountingfees" + type: "integer" + description: "Accounting fees" + mode: "nullable" + - name: "interestamt" + type: "integer" + description: "Interest" + mode: "nullable" + - name: "depreciationamt" + type: "integer" + description: "Depreciation and depletion" + mode: "nullable" + - name: "occupancyamt" + type: "integer" + description: "Occupancy" + mode: "nullable" + - name: "travlconfmtngs" + type: "integer" + description: "Travel conferences and meetings" + mode: "nullable" + - name: "printingpubl" + type: "integer" + description: "Printing and publications" + mode: "nullable" + - name: "topradmnexpnsa" + type: "integer" + description: "Total operating and administrative expenses column a" + mode: "nullable" + - name: "contrpdpbks" + type: "integer" + description: "Contributions gifts grants paid" + mode: "nullable" + - name: "totexpnspbks" + type: "integer" + description: "Total expenses" + mode: "nullable" + - name: "excessrcpts" + type: "integer" + description: "Net income less deficit" + mode: "nullable" + - name: "totrcptnetinc" + type: "integer" + description: "Total receipts net investment income" + mode: "nullable" + - name: "topradmnexpnsb" + type: "integer" + description: "Total operating and administrative expenses column b" + mode: "nullable" + - name: "totexpnsnetinc" + type: "integer" + description: "Total expenses net investment income" + mode: "nullable" + - name: "netinvstinc" + type: "integer" + description: "Net investment income" + mode: "nullable" + - name: "trcptadjnetinc" + type: "integer" + description: "Total receipts adjusted net income" + mode: "nullable" + - name: "totexpnsadjnet" + type: "integer" + description: "Total expenses adjusted net income" + mode: "nullable" + - name: "adjnetinc" + type: "integer" + description: "Adjusted net income" + mode: "nullable" + - name: "topradmnexpnsd" + type: "integer" + description: "Total operating and administrative expenses column d" + mode: "nullable" + - name: "totexpnsexempt" + type: "integer" + description: "Total expenses – exempt purpose" + mode: "nullable" + - name: "othrcashamt" + type: "integer" + description: "Cash non-interest-bearing – e-o-y book value" + mode: "nullable" + - name: "invstgovtoblig" + type: "integer" + description: "Investments in U.S. & state government obligations – e-o-y book value" + mode: "nullable" + - name: "invstcorpstk" + type: "integer" + description: "Investments in corporate stock – e-o-y book value" + mode: "nullable" + - name: "invstcorpbnd" + type: "integer" + description: "Investments in corporate bonds– e-o-y book value" + mode: "nullable" + - name: "totinvstsec" + type: "integer" + description: "Total investments in securities – e-o-y book value" + mode: "nullable" + - name: "mrtgloans" + type: "integer" + description: "Investments mortgage loans – e-o-y book value" + mode: "nullable" + - name: "othrinvstend" + type: "integer" + description: "Other investments – e-o-y book value" + mode: "nullable" + - name: "othrassetseoy" + type: "integer" + description: "Other assets – e-o-y book value" + mode: "nullable" + - name: "totassetsend" + type: "integer" + description: "Total assets – e-o-y book value" + mode: "nullable" + - name: "mrtgnotespay" + type: "integer" + description: "Mortgage loans payable – e-o-y book value" + mode: "nullable" + - name: "othrliabltseoy" + type: "integer" + description: "Other liabilities – e-o-y book value" + mode: "nullable" + - name: "totliabend" + type: "integer" + description: "Total liabilities – e-o-y book value" + mode: "nullable" + - name: "tfundnworth" + type: "integer" + description: "Total fund net worth – e-o-y book value" + mode: "nullable" + - name: "fairmrktvaleoy" + type: "integer" + description: "Total assets – e-o-y fair market value" + mode: "nullable" + - name: "totexcapgnls" + type: "integer" + description: "Capital gain net income" + mode: "nullable" + - name: "totexcapgn" + type: "integer" + description: "Net gain – sales of assets" + mode: "nullable" + - name: "totexcapls" + type: "integer" + description: "Net loss – sales of assets" + mode: "nullable" + - name: "invstexcisetx" + type: "integer" + description: "Excise tax on net investment income" + mode: "nullable" + - name: "sec4940notxcd" + type: "string" + description: "Section 4940 – no tax" + mode: "nullable" + - name: "sec4940redtxcd" + type: "string" + description: "Section 4940 – 1 % tax" + mode: "nullable" + - name: "sect511tx" + type: "integer" + description: "Section 511 tax" + mode: "nullable" + - name: "subtitleatx" + type: "integer" + description: "Subtitle A tax" + mode: "nullable" + - name: "totaxpyr" + type: "integer" + description: "Total excise tax" + mode: "nullable" + - name: "esttaxcr" + type: "integer" + description: "Estimated tax credit" + mode: "nullable" + - name: "txwithldsrc" + type: "integer" + description: "Tax withheld at source" + mode: "nullable" + - name: "txpaidf2758" + type: "integer" + description: "Tax paid with Form 2758 (filing extension)" + mode: "nullable" + - name: "erronbkupwthld" + type: "integer" + description: "Erroneous backup withholding credit amount" + mode: "nullable" + - name: "estpnlty" + type: "integer" + description: "Estimated tax penalty" + mode: "nullable" + - name: "taxdue" + type: "integer" + description: "Tax due" + mode: "nullable" + - name: "overpay" + type: "integer" + description: "Overpayment" + mode: "nullable" + - name: "crelamt" + type: "integer" + description: "Credit elect amount" + mode: "nullable" + - name: "infleg" + type: "string" + description: "Influence legislation?" + mode: "nullable" + - name: "actnotpr" + type: "string" + description: "Activities not previously reported?" + mode: "nullable" + - name: "chgnprvrptcd" + type: "string" + description: "Changes not previously reported?" + mode: "nullable" + - name: "filedf990tcd" + type: "string" + description: "Filed 990-T?" + mode: "nullable" + - name: "contractncd" + type: "string" + description: "Contraction?" + mode: "nullable" + - name: "furnishcpycd" + type: "string" + description: "Furnished copy to Attorney General?" + mode: "nullable" + - name: "claimstatcd" + type: "string" + description: "Claiming status?" + mode: "nullable" + - name: "cntrbtrstxyrcd" + type: "string" + description: "Substantial contributors?" + mode: "nullable" + - name: "distribdafcd" + type: "string" + description: "Distribution to donor advised fund with advisory privileges?" + mode: "nullable" + - name: "orgcmplypubcd" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "filedlf1041ind" + type: "string" + description: "Comply with public inspection?" + mode: "nullable" + - name: "propexchcd" + type: "string" + description: "Property exchange?" + mode: "nullable" + - name: "brwlndmnycd" + type: "string" + description: "Borrow lend money?" + mode: "nullable" + - name: "furngoodscd" + type: "string" + description: "Furnished goods?" + mode: "nullable" + - name: "paidcmpncd" + type: "string" + description: "Paid compensation?" + mode: "nullable" + - name: "transfercd" + type: "string" + description: "Transfer?" + mode: "nullable" + - name: "agremkpaycd" + type: "string" + description: "Agree to make pay?" + mode: "nullable" + - name: "exceptactsind" + type: "string" + description: "Acts fail to qualify under section 53.4941(d)-3?" + mode: "nullable" + - name: "prioractvcd" + type: "string" + description: "Engage in acts in prior year?" + mode: "nullable" + - name: "undistrinccd" + type: "string" + description: "Undistributed income?" + mode: "nullable" + - name: "applyprovind" + type: "string" + description: "Not applying section 4942(a)(2) provisions?" + mode: "nullable" + - name: "dirindirintcd" + type: "string" + description: "Direct indirect interest?" + mode: "nullable" + - name: "excesshldcd" + type: "string" + description: "Excess business holdings?" + mode: "nullable" + - name: "invstjexmptcd" + type: "string" + description: "Jeopardizing investments?" + mode: "nullable" + - name: "prevjexmptcd" + type: "string" + description: "Prior year jeopardizing investments?" + mode: "nullable" + - name: "propgndacd" + type: "string" + description: "Propaganda?" + mode: "nullable" + - name: "ipubelectcd" + type: "string" + description: "Influence public election?" + mode: "nullable" + - name: "grntindivcd" + type: "string" + description: "Grant individual?" + mode: "nullable" + - name: "nchrtygrntcd" + type: "string" + description: "Non-charity grant?" + mode: "nullable" + - name: "nreligiouscd" + type: "string" + description: "Non-religious?" + mode: "nullable" + - name: "excptransind" + type: "string" + description: "Transactions fail to qualify under section 53.4945?" + mode: "nullable" + - name: "rfprsnlbnftind" + type: "string" + description: "Receive funds to pay premiums on personal benefit contract?" + mode: "nullable" + - name: "pyprsnlbnftind" + type: "string" + description: "Pay premiums on personal benefit contract?" + mode: "nullable" + - name: "tfairmrktunuse" + type: "integer" + description: "Fair market value of assets not used for charitable purposes" + mode: "nullable" + - name: "valncharitassets" + type: "integer" + description: "Net value of noncharitable-use assets" + mode: "nullable" + - name: "cmpmininvstret" + type: "integer" + description: "Minimum investment return" + mode: "nullable" + - name: "distribamt" + type: "integer" + description: "Distributable amount" + mode: "nullable" + - name: "undistribincyr" + type: "integer" + description: "Undistributed income" + mode: "nullable" + - name: "adjnetinccola" + type: "integer" + description: "Adjusted net income column a" + mode: "nullable" + - name: "adjnetinccolb" + type: "integer" + description: "Adjusted net income column b" + mode: "nullable" + - name: "adjnetinccolc" + type: "integer" + description: "Adjusted net income column c" + mode: "nullable" + - name: "adjnetinccold" + type: "integer" + description: "Adjusted net income column d" + mode: "nullable" + - name: "adjnetinctot" + type: "integer" + description: "Adjusted net income total" + mode: "nullable" + - name: "qlfydistriba" + type: "integer" + description: "Qualifying distributions column a" + mode: "nullable" + - name: "qlfydistribb" + type: "integer" + description: "Qualifying distributions column b" + mode: "nullable" + - name: "qlfydistribc" + type: "integer" + description: "Qualifying distributions column c" + mode: "nullable" + - name: "qlfydistribd" + type: "integer" + description: "Qualifying distributions column d" + mode: "nullable" + - name: "qlfydistribtot" + type: "integer" + description: "Qualifying distributions total" + mode: "nullable" + - name: "valassetscola" + type: "integer" + description: "Value assets column a" + mode: "nullable" + - name: "valassetscolb" + type: "integer" + description: "Value assets column b" + mode: "nullable" + - name: "valassetscolc" + type: "integer" + description: "Value assets column c" + mode: "nullable" + - name: "valassetscold" + type: "integer" + description: "Value assets column d" + mode: "nullable" + - name: "valassetstot" + type: "integer" + description: "Value assets total" + mode: "nullable" + - name: "qlfyasseta" + type: "integer" + description: "Qualifying assets column a" + mode: "nullable" + - name: "qlfyassetb" + type: "integer" + description: "Qualifying assets column b" + mode: "nullable" + - name: "qlfyassetc" + type: "integer" + description: "Qualifying assets column c" + mode: "nullable" + - name: "qlfyassetd" + type: "integer" + description: "Qualifying assets column d" + mode: "nullable" + - name: "qlfyassettot" + type: "integer" + description: "Qualifying assets total" + mode: "nullable" + - name: "endwmntscola" + type: "integer" + description: "Endowments column a" + mode: "nullable" + - name: "endwmntscolb" + type: "integer" + description: "Endowments column b" + mode: "nullable" + - name: "endwmntscolc" + type: "integer" + description: "Endowments column c" + mode: "nullable" + - name: "endwmntscold" + type: "integer" + description: "Endowments column d" + mode: "nullable" + - name: "endwmntstot" + type: "integer" + description: "Endowments total" + mode: "nullable" + - name: "totsuprtcola" + type: "integer" + description: "Total support column a" + mode: "nullable" + - name: "totsuprtcolb" + type: "integer" + description: "Total support column b" + mode: "nullable" + - name: "totsuprtcolc" + type: "integer" + description: "Total support column c" + mode: "nullable" + - name: "totsuprtcold" + type: "integer" + description: "Total support column d" + mode: "nullable" + - name: "totsuprttot" + type: "integer" + description: "Total support total" + mode: "nullable" + - name: "pubsuprtcola" + type: "integer" + description: "Public support column a" + mode: "nullable" + - name: "pubsuprtcolb" + type: "integer" + description: "Public support column b" + mode: "nullable" + - name: "pubsuprtcolc" + type: "integer" + description: "Public support column c" + mode: "nullable" + - name: "pubsuprtcold" + type: "integer" + description: "Public support column d" + mode: "nullable" + - name: "pubsuprttot" + type: "integer" + description: "Public support total" + mode: "nullable" + - name: "grsinvstinca" + type: "integer" + description: "Gross investment income column a" + mode: "nullable" + - name: "grsinvstincb" + type: "integer" + description: "Gross investment income column b" + mode: "nullable" + - name: "grsinvstincc" + type: "integer" + description: "Gross investment income column c" + mode: "nullable" + - name: "grsinvstincd" + type: "integer" + description: "Gross investment income column d" + mode: "nullable" + - name: "grsinvstinctot" + type: "integer" + description: "Gross investment income total" + mode: "nullable" + - name: "grntapprvfut" + type: "integer" + description: "Grants approved for future payment" + mode: "nullable" + - name: "progsrvcacold" + type: "integer" + description: "Program service revenue line 1a (excluded)" + mode: "nullable" + - name: "progsrvcacole" + type: "integer" + description: "Program service revenue line 1a (exempt)" + mode: "nullable" + - name: "progsrvcbcold" + type: "integer" + description: "Program service revenue line 1b (excluded)" + mode: "nullable" + - name: "progsrvcbcole" + type: "integer" + description: "Program service revenue line 1b (exempt)" + mode: "nullable" + - name: "progsrvcccold" + type: "integer" + description: "Program service revenue line 1c (excluded)" + mode: "nullable" + - name: "progsrvcccole" + type: "integer" + description: "Program service revenue line 1c (exempt)" + mode: "nullable" + - name: "progsrvcdcold" + type: "integer" + description: "Program service revenue line 1d (excluded)" + mode: "nullable" + - name: "progsrvcdcole" + type: "integer" + description: "Program service revenue line 1d (exempt)" + mode: "nullable" + - name: "progsrvcecold" + type: "integer" + description: "Program service revenue line 1e (excluded)" + mode: "nullable" + - name: "progsrvcecole" + type: "integer" + description: "Program service revenue line 1e (exempt)" + mode: "nullable" + - name: "progsrvcfcold" + type: "integer" + description: "Program service revenue line 1f (excluded)" + mode: "nullable" + - name: "progsrvcfcole" + type: "integer" + description: "Program service revenue line 1f (exempt)" + mode: "nullable" + - name: "progsrvcgcold" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (excluded)" + mode: "nullable" + - name: "progsrvcgcole" + type: "integer" + description: "Program service revenue--fees and contracts from government line 1g (exempt)" + mode: "nullable" + - name: "membershpduesd" + type: "integer" + description: "Membership dues and assessments (excluded)" + mode: "nullable" + - name: "membershpduese" + type: "integer" + description: "Membership dues and assessments (exempt)" + mode: "nullable" + - name: "intonsvngsd" + type: "integer" + description: "Interest on savings and temporary cash investments (excluded)" + mode: "nullable" + - name: "intonsvngse" + type: "integer" + description: "Interest on savings and temporary cash investments (exempt)" + mode: "nullable" + - name: "dvdndsintd" + type: "integer" + description: "Dividends and interest from securities (excluded)" + mode: "nullable" + - name: "dvdndsinte" + type: "integer" + description: "Dividends and interest from securities (exempt)" + mode: "nullable" + - name: "trnsfrcashcd" + type: "string" + description: "Transfer cash to noncharitable exempt organization?" + mode: "nullable" + - name: "trnsothasstscd" + type: "string" + description: "Transfer other assets to noncharitable exempt organization?" + mode: "nullable" + - name: "salesasstscd" + type: "string" + description: "Sale of assets to noncharitable exempt organization?" + mode: "nullable" + - name: "prchsasstscd" + type: "string" + description: "Purchase of assets from noncharitable exempt organization?" + mode: "nullable" + - name: "rentlsfacltscd" + type: "string" + description: "Rental of facilities or other assets?" + mode: "nullable" + - name: "reimbrsmntscd" + type: "string" + description: "Reimbursements arrangements?" + mode: "nullable" + - name: "loansguarcd" + type: "string" + description: "Loans or other guarantees?" + mode: "nullable" + - name: "perfservicescd" + type: "string" + description: "Performance of services or membership or fundraising solicitations?" + mode: "nullable" + - name: "sharngasstscd" + type: "string" + description: "Sharing of facilities equipment mailing lists other assets or paid employees?" + mode: "nullable" graph_paths: - "irs_990_pf_2016_transform_csv >> load_irs_990_pf_2016_to_bq" - - - - - \ No newline at end of file From c776161d3292fd5c684ea5779c171c004d17b633 Mon Sep 17 00:00:00 2001 From: Dipannita Banerjee Date: Fri, 27 Aug 2021 06:02:18 +0000 Subject: [PATCH 4/4] feat: change in terraform files --- datasets/irs_990/_terraform/irs_990_2014_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_2015_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_2016_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_2017_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf | 2 +- datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf index 88937f00f..6f1027172 100644 --- a/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_2014_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_2014" { dataset_id = "irs_990" table_id = "irs_990_2014" - description = "irs_990 2014 dataset" + description = "IRS 990 2014 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf index 3ee8b4c57..bb52f2ec2 100644 --- a/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_2015_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_2015" { dataset_id = "irs_990" table_id = "irs_990_2015" - description = "irs_990 2015 dataset" + description = "IRS 990 2015 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf index d88ea75b8..9a98af73f 100644 --- a/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_2016_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_2016" { dataset_id = "irs_990" table_id = "irs_990_2016" - description = "irs_990_2016 dataset" + description = "IRS 990 2016 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf b/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf index 7be2429de..77dce805e 100644 --- a/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_2017_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_2017" { dataset_id = "irs_990" table_id = "irs_990_2017" - description = "irs_990_2017 dataset" + description = "IRS 990 2017 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf index 3cae56f31..e41c19b45 100644 --- a/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_ez_2014_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_ez_2014" { dataset_id = "irs_990" table_id = "irs_990_ez_2014" - description = "irs_990_ez_2014 dataset" + description = "IRS 990 EZ 2014 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf index 3c50c8715..6ec430986 100644 --- a/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_ez_2015_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_ez_2015" { dataset_id = "irs_990" table_id = "irs_990_ez_2015" - description = "irs_990_ez_2015 dataset" + description = "IRS 990 EZ 2015 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf index 46b95047f..9ce37db35 100644 --- a/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_ez_2016_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_ez_2016" { dataset_id = "irs_990" table_id = "irs_990_ez_2016" - description = "irs_990_ez_2016 dataset" + description = "IRS 990 EZ 2016 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf b/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf index 74a660625..81fbc1ae8 100644 --- a/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_ez_2017_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_ez_2017" { dataset_id = "irs_990" table_id = "irs_990_ez_2017" - description = "irs_990_ez_2017 dataset" + description = "IRS 990 EZ 2017 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf b/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf index 5cf796be4..34b7d2bb3 100644 --- a/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_pf_2014_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_pf_2014" { dataset_id = "irs_990" table_id = "irs_990_pf_2014" - description = "irs_990_pf_2014 dataset" + description = "IRS 990 PF 2014 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf b/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf index eac0f9ae4..f8ed21abd 100644 --- a/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_pf_2015_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_pf_2015" { dataset_id = "irs_990" table_id = "irs_990_pf_2015" - description = "irs_990_pf_2015 dataset" + description = "IRS 990 PF 2015 dataset" diff --git a/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf b/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf index e1b45d250..99c885e12 100644 --- a/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf +++ b/datasets/irs_990/_terraform/irs_990_pf_2016_pipeline.tf @@ -20,7 +20,7 @@ resource "google_bigquery_table" "irs_990_pf_2016" { dataset_id = "irs_990" table_id = "irs_990_pf_2016" - description = "irs_990_pf_2016 dataset" + description = "IRS 990 PF 2016 dataset"