diff --git a/datasets/chicago_crime/_images/run_csv_transform_kub/Dockerfile b/datasets/chicago_crime/_images/run_csv_transform_kub/Dockerfile new file mode 100644 index 000000000..7265a1b71 --- /dev/null +++ b/datasets/chicago_crime/_images/run_csv_transform_kub/Dockerfile @@ -0,0 +1,37 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The base image for this build +FROM python:3.8 + +# Allow statements and log messages to appear in Cloud logs +ENV PYTHONUNBUFFERED True + +# Copy the requirements file into the image +COPY requirements.txt ./ + +# Install the packages specified in the requirements file +RUN python3 -m pip install --no-cache-dir -r requirements.txt + +# The WORKDIR instruction sets the working directory for any RUN, CMD, +# ENTRYPOINT, COPY and ADD instructions that follow it in the Dockerfile. +# If the WORKDIR doesn’t exist, it will be created even if it’s not used in +# any subsequent Dockerfile instruction +WORKDIR /custom + +# Copy the specific data processing script/s in the image under /custom/* +COPY ./csv_transform.py . + +# Command to run the data processing script when the container is run +CMD ["python3", "csv_transform.py"] diff --git a/datasets/chicago_crime/_images/run_csv_transform_kub/csv_transform.py b/datasets/chicago_crime/_images/run_csv_transform_kub/csv_transform.py new file mode 100644 index 000000000..d7a9f9410 --- /dev/null +++ b/datasets/chicago_crime/_images/run_csv_transform_kub/csv_transform.py @@ -0,0 +1,248 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import datetime +import logging +import math +import os +import pathlib +import subprocess +import typing + +import pandas as pd +import requests +from google.cloud import storage + + +def main( + source_url: str, + source_file: pathlib.Path, + target_file: pathlib.Path, + target_gcs_bucket: str, + target_gcs_path: str, + chunk_size: str, +) -> None: + + logging.info( + "Chicago Crime process started at " + + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + ) + + logging.info("Creating 'files' folder") + pathlib.Path("./files").mkdir(parents=True, exist_ok=True) + + logging.info(f"Downloading file {source_url}") + download_file(source_url, source_file) + + with pd.read_csv( + source_file, + chunksize=int(chunk_size), + ) as reader: + for chunk_number, chunk in enumerate(reader): + logging.info(f"Processing batch {chunk_number}") + target_file_batch = str(target_file).replace( + ".csv", "-" + str(chunk_number) + ".csv" + ) + df = pd.DataFrame() + df = pd.concat([df, chunk]) + + logging.info(f"Transforming {source_file} ...") + + logging.info(f"Transform: Rename columns {source_file} ...") + rename_headers(df) + + logging.info("Transform: Converting date format.. ") + convert_values(df) + + logging.info("Transform: Removing null values.. ") + filter_null_rows(df) + + logging.info("Transform: Converting to integers..") + convert_values_to_integer_string(df) + + logging.info("Transform: Converting to float..") + removing_nan_values(df) + + logging.info("Transform: Reordering headers..") + df = df[ + [ + "unique_key", + "case_number", + "date", + "block", + "iucr", + "primary_type", + "description", + "location_description", + "arrest", + "domestic", + "beat", + "district", + "ward", + "community_area", + "fbi_code", + "x_coordinate", + "y_coordinate", + "year", + "updated_on", + "latitude", + "longitude", + "location", + ] + ] + + process_chunk(df, target_file_batch) + + logging.info(f"Appending batch {chunk_number} to {target_file}") + if chunk_number == 0: + subprocess.run(["cp", target_file_batch, target_file]) + else: + subprocess.check_call(f"sed -i '1d' {target_file_batch}", shell=True) + subprocess.check_call( + f"cat {target_file_batch} >> {target_file}", shell=True + ) + subprocess.run(["rm", target_file_batch]) + + logging.info( + f"Uploading output file to.. gs://{target_gcs_bucket}/{target_gcs_path}" + ) + upload_file_to_gcs(target_file, target_gcs_bucket, target_gcs_path) + + logging.info( + "Chicago crime process completed at " + + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + ) + + +def process_chunk(df: pd.DataFrame, target_file_batch: str) -> None: + + logging.info(f"Saving to output file.. {target_file_batch}") + try: + save_to_new_file(df, file_path=str(target_file_batch)) + except Exception as e: + logging.error(f"Error saving output file: {e}.") + logging.info("..Done!") + + +def resolve_nan(input: typing.Union[str, float]) -> str: + if not input or (math.isnan(input)): + return "" + return str(input).replace("None", "") + + +def removing_nan_values(df: pd.DataFrame) -> None: + cols = ["x_coordinate", "y_coordinate", "latitude", "longitude"] + for cols in cols: + df[cols] = df[cols].apply(resolve_nan) + + +def convert_to_integer_string(input: typing.Union[str, float]) -> str: + + if not input or (math.isnan(input)): + return "" + return str(int(round(input, 0))) + + +def convert_values_to_integer_string(df: pd.DataFrame) -> None: + cols = ["unique_key", "beat", "district", "ward", "community_area", "year"] + + for cols in cols: + df[cols] = df[cols].apply(convert_to_integer_string) + + +def rename_headers(df: pd.DataFrame) -> None: + header_names = { + "ID": "unique_key", + "Case Number": "case_number", + "Date": "date", + "Block": "block", + "IUCR": "iucr", + "Primary Type": "primary_type", + "Description": "description", + "Location Description": "location_description", + "Arrest": "arrest", + "Domestic": "domestic", + "Beat": "beat", + "District": "district", + "Ward": "ward", + "Community Area": "community_area", + "FBI Code": "fbi_code", + "X Coordinate": "x_coordinate", + "Y Coordinate": "y_coordinate", + "Year": "year", + "Updated On": "updated_on", + "Latitude": "latitude", + "Longitude": "longitude", + "Location": "location", + } + + df.rename(columns=header_names, inplace=True) + + +def convert_dt_format(dt_str: str) -> str: + # Old format: MM/dd/yyyy hh:mm:ss aa + # New format: yyyy-MM-dd HH:mm:ss + if not dt_str: + return dt_str + else: + return datetime.datetime.strptime(dt_str, "%m/%d/%Y %H:%M:%S %p").strftime( + "%Y-%m-%d %H:%M:%S" + ) + + +def convert_values(df: pd.DataFrame) -> None: + dt_cols = ["date", "updated_on"] + + for dt_col in dt_cols: + df[dt_col] = df[dt_col].apply(convert_dt_format) + + +def filter_null_rows(df: pd.DataFrame) -> None: + df = df[df.unique_key != ""] + + +def save_to_new_file(df: pd.DataFrame, file_path: pathlib.Path) -> None: + df.to_csv(file_path, index=False) + + +def download_file(source_url: str, source_file: pathlib.Path) -> None: + logging.info(f"Downloading {source_url} into {source_file}") + r = requests.get(source_url, stream=True) + if r.status_code == 200: + with open(source_file, "wb") as f: + for chunk in r: + f.write(chunk) + else: + logging.error(f"Couldn't download {source_url}: {r.text}") + + +def upload_file_to_gcs(file_path: pathlib.Path, gcs_bucket: str, gcs_path: str) -> None: + storage_client = storage.Client() + bucket = storage_client.bucket(gcs_bucket) + blob = bucket.blob(gcs_path) + blob.upload_from_filename(file_path) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + + main( + source_url=os.environ["SOURCE_URL"], + source_file=pathlib.Path(os.environ["SOURCE_FILE"]).expanduser(), + target_file=pathlib.Path(os.environ["TARGET_FILE"]).expanduser(), + target_gcs_bucket=os.environ["TARGET_GCS_BUCKET"], + target_gcs_path=os.environ["TARGET_GCS_PATH"], + chunk_size=os.environ["CHUNK_SIZE"], + ) diff --git a/datasets/chicago_crime/_images/run_csv_transform_kub/requirements.txt b/datasets/chicago_crime/_images/run_csv_transform_kub/requirements.txt new file mode 100644 index 000000000..f36704793 --- /dev/null +++ b/datasets/chicago_crime/_images/run_csv_transform_kub/requirements.txt @@ -0,0 +1,3 @@ +requests +pandas +google-cloud-storage diff --git a/datasets/chicago_crime/_terraform/chicago_crime_dataset.tf b/datasets/chicago_crime/_terraform/chicago_crime_dataset.tf new file mode 100644 index 000000000..9de937909 --- /dev/null +++ b/datasets/chicago_crime/_terraform/chicago_crime_dataset.tf @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_dataset" "chicago_crime" { + dataset_id = "chicago_crime" + project = var.project_id + description = "This dataset reflects reported incidents of crime (with the exception of murders where data exists for each victim) that occurred in the City of Chicago from 2001 to present, minus the most recent seven days. Data is extracted from the Chicago Police Department\u0027s CLEAR (Citizen Law Enforcement Analysis and Reporting) system. In order to protect the privacy of crime victims, addresses are shown at the block level only and specific locations are not identified. This data includes unverified reports supplied to the Police Department. The preliminary crime classifications may be changed at a later date based upon additional investigation and there is always the possibility of mechanical or human error. Therefore, the Chicago Police Department does not guarantee (either expressed or implied) the accuracy, completeness, timeliness, or correct sequencing of the information and the information should not be used for comparison purposes over time.\n\nDataset Source: City of Chicago\n\nCategory: Chicago, Public Safety\n\nUse: This dataset is publicly available for anyone to use under the following terms provided by the Dataset Source \u2014https://data.cityofchicago.org \u2014 and is provided \"AS IS\" without any warranty, express or implied, from Google. Google disclaims all liability for any damages, direct or indirect, resulting from the use of the dataset.\n\nUpdate Frequency: Daily" +} + +output "bigquery_dataset-chicago_crime-dataset_id" { + value = google_bigquery_dataset.chicago_crime.dataset_id +} diff --git a/datasets/chicago_crime/_terraform/crime_pipeline.tf b/datasets/chicago_crime/_terraform/crime_pipeline.tf new file mode 100644 index 000000000..a76556314 --- /dev/null +++ b/datasets/chicago_crime/_terraform/crime_pipeline.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_bigquery_table" "crime" { + project = var.project_id + dataset_id = "chicago_crime" + table_id = "crime" + + description = "Chicago Crime dataset" + + + + + depends_on = [ + google_bigquery_dataset.chicago_crime + ] +} + +output "bigquery_table-crime-table_id" { + value = google_bigquery_table.crime.table_id +} + +output "bigquery_table-crime-id" { + value = google_bigquery_table.crime.id +} diff --git a/datasets/chicago_crime/_terraform/provider.tf b/datasets/chicago_crime/_terraform/provider.tf new file mode 100644 index 000000000..23ab87dcd --- /dev/null +++ b/datasets/chicago_crime/_terraform/provider.tf @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +provider "google" { + project = var.project_id + impersonate_service_account = var.impersonating_acct + region = var.region +} + +data "google_client_openid_userinfo" "me" {} + +output "impersonating-account" { + value = data.google_client_openid_userinfo.me.email +} diff --git a/datasets/chicago_crime/_terraform/variables.tf b/datasets/chicago_crime/_terraform/variables.tf new file mode 100644 index 000000000..c3ec7c506 --- /dev/null +++ b/datasets/chicago_crime/_terraform/variables.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +variable "project_id" {} +variable "bucket_name_prefix" {} +variable "impersonating_acct" {} +variable "region" {} +variable "env" {} + diff --git a/datasets/chicago_crime/crime/crime_dag.py b/datasets/chicago_crime/crime/crime_dag.py new file mode 100644 index 000000000..97df85d90 --- /dev/null +++ b/datasets/chicago_crime/crime/crime_dag.py @@ -0,0 +1,106 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.contrib.operators import gcs_to_bq, kubernetes_pod_operator + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2021-03-01", +} + + +with DAG( + dag_id="chicago_crime.crime", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Run CSV transform within kubernetes pod + chicago_crime_transform_csv = kubernetes_pod_operator.KubernetesPodOperator( + task_id="chicago_crime_transform_csv", + startup_timeout_seconds=600, + name="crime", + namespace="default", + affinity={ + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "cloud.google.com/gke-nodepool", + "operator": "In", + "values": ["pool-e2-standard-4"], + } + ] + } + ] + } + } + }, + image_pull_policy="Always", + image="{{ var.json.chicago_crime.container_registry.run_csv_transform_kub }}", + env_vars={ + "SOURCE_URL": "https://data.cityofchicago.org/api/views/ijzp-q8t2/rows.csv", + "SOURCE_FILE": "files/data.csv", + "TARGET_FILE": "files/data_output.csv", + "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", + "TARGET_GCS_PATH": "data/chicago_crime/crime/data_output.csv", + "CHUNK_SIZE": "1000000", + }, + resources={"request_memory": "2G", "request_cpu": "1"}, + ) + + # Task to load CSV data to a BigQuery table + load_chicago_crime_to_bq = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( + task_id="load_chicago_crime_to_bq", + bucket="{{ var.value.composer_bucket }}", + source_objects=["data/chicago_crime/crime/data_output.csv"], + source_format="CSV", + destination_project_dataset_table="chicago_crime.crime", + skip_leading_rows=1, + write_disposition="WRITE_TRUNCATE", + schema_fields=[ + {"name": "unique_key", "type": "integer", "mode": "required"}, + {"name": "case_number", "type": "string", "mode": "nullable"}, + {"name": "date", "type": "timestamp", "mode": "nullable"}, + {"name": "block", "type": "string", "mode": "nullable"}, + {"name": "iucr", "type": "string", "mode": "nullable"}, + {"name": "primary_type", "type": "string", "mode": "nullable"}, + {"name": "description", "type": "string", "mode": "nullable"}, + {"name": "location_description", "type": "string", "mode": "nullable"}, + {"name": "arrest", "type": "boolean", "mode": "nullable"}, + {"name": "domestic", "type": "boolean", "mode": "nullable"}, + {"name": "beat", "type": "integer", "mode": "nullable"}, + {"name": "district", "type": "integer", "mode": "nullable"}, + {"name": "ward", "type": "integer", "mode": "nullable"}, + {"name": "community_area", "type": "integer", "mode": "nullable"}, + {"name": "fbi_code", "type": "string", "mode": "nullable"}, + {"name": "x_coordinate", "type": "float", "mode": "nullable"}, + {"name": "y_coordinate", "type": "float", "mode": "nullable"}, + {"name": "year", "type": "integer"}, + {"name": "updated_on", "type": "timestamp", "mode": "nullable"}, + {"name": "latitude", "type": "float", "mode": "nullable"}, + {"name": "longitude", "type": "float", "mode": "nullable"}, + {"name": "location", "type": "string", "mode": "nullable"}, + ], + ) + + chicago_crime_transform_csv >> load_chicago_crime_to_bq diff --git a/datasets/chicago_crime/crime/pipeline.yaml b/datasets/chicago_crime/crime/pipeline.yaml new file mode 100644 index 000000000..de08b4e57 --- /dev/null +++ b/datasets/chicago_crime/crime/pipeline.yaml @@ -0,0 +1,144 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +resources: + + - type: bigquery_table + table_id: crime + description: "Chicago Crime dataset" + +dag: + airflow_version: 1 + initialize: + dag_id: crime + default_args: + owner: "Google" + depends_on_past: False + start_date: '2021-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "KubernetesPodOperator" + description: "Run CSV transform within kubernetes pod" + args: + task_id: "chicago_crime_transform_csv" + startup_timeout_seconds: 600 + name: "crime" + namespace: "default" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cloud.google.com/gke-nodepool + operator: In + values: + - "pool-e2-standard-4" + image_pull_policy: "Always" + image: "{{ var.json.chicago_crime.container_registry.run_csv_transform_kub }}" + env_vars: + SOURCE_URL: "https://data.cityofchicago.org/api/views/ijzp-q8t2/rows.csv" + SOURCE_FILE: "files/data.csv" + TARGET_FILE: "files/data_output.csv" + TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" + TARGET_GCS_PATH: "data/chicago_crime/crime/data_output.csv" + CHUNK_SIZE: "1000000" + resources: + request_memory: "2G" + request_cpu: "1" + + - operator: "GoogleCloudStorageToBigQueryOperator" + description: "Task to load CSV data to a BigQuery table" + args: + task_id: "load_chicago_crime_to_bq" + bucket: "{{ var.value.composer_bucket }}" + source_objects: ["data/chicago_crime/crime/data_output.csv"] + source_format: "CSV" + destination_project_dataset_table: "chicago_crime.crime" + skip_leading_rows: 1 + write_disposition: "WRITE_TRUNCATE" + + schema_fields: + - name: "unique_key" + type: "integer" + mode: "required" + - name: "case_number" + type: "string" + mode: "nullable" + - name: "date" + type: "timestamp" + mode: "nullable" + - name: "block" + type: "string" + mode: "nullable" + - name: "iucr" + type: "string" + mode: "nullable" + - name: "primary_type" + type: "string" + mode: "nullable" + - name: "description" + type: "string" + mode: "nullable" + - name: "location_description" + type: "string" + mode: "nullable" + - name: "arrest" + type: "boolean" + mode: "nullable" + - name: "domestic" + type: "boolean" + mode: "nullable" + - name: "beat" + type: "integer" + mode: "nullable" + - name: "district" + type: "integer" + mode: "nullable" + - name: "ward" + type: "integer" + mode: "nullable" + - name: "community_area" + type: "integer" + mode: "nullable" + - name: "fbi_code" + type: "string" + mode: "nullable" + - name: "x_coordinate" + type: "float" + mode: "nullable" + - name: "y_coordinate" + type: "float" + mode: "nullable" + - name: "year" + type: "integer" + - name: "updated_on" + type: "timestamp" + mode: "nullable" + - name: "latitude" + type: "float" + mode: "nullable" + - name: "longitude" + type: "float" + mode: "nullable" + - name: "location" + type: "string" + mode: "nullable" + + graph_paths: + - "chicago_crime_transform_csv >> load_chicago_crime_to_bq" diff --git a/datasets/chicago_crime/dataset.yaml b/datasets/chicago_crime/dataset.yaml new file mode 100644 index 000000000..a40007b38 --- /dev/null +++ b/datasets/chicago_crime/dataset.yaml @@ -0,0 +1,44 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dataset: + name: chicago_crime + friendly_name: chicago_crime + description: |- + This dataset reflects reported incidents of crime (with the exception of murders where data exists for each victim) that occurred in the City of Chicago from 2001 to present, minus the most recent seven days. Data is extracted from the Chicago Police Department's CLEAR (Citizen Law Enforcement Analysis and Reporting) system. In order to protect the privacy of crime victims, addresses are shown at the block level only and specific locations are not identified. This data includes unverified reports supplied to the Police Department. The preliminary crime classifications may be changed at a later date based upon additional investigation and there is always the possibility of mechanical or human error. Therefore, the Chicago Police Department does not guarantee (either expressed or implied) the accuracy, completeness, timeliness, or correct sequencing of the information and the information should not be used for comparison purposes over time. + + Dataset Source: City of Chicago + + Category: Chicago, Public Safety + + Use: This dataset is publicly available for anyone to use under the following terms provided by the Dataset Source —https://data.cityofchicago.org — and is provided "AS IS" without any warranty, express or implied, from Google. Google disclaims all liability for any damages, direct or indirect, resulting from the use of the dataset. + + Update Frequency: Daily + dataset_sources: ~ + terms_of_use: ~ + + +resources: + - type: bigquery_dataset + dataset_id: chicago_crime + description: |- + This dataset reflects reported incidents of crime (with the exception of murders where data exists for each victim) that occurred in the City of Chicago from 2001 to present, minus the most recent seven days. Data is extracted from the Chicago Police Department's CLEAR (Citizen Law Enforcement Analysis and Reporting) system. In order to protect the privacy of crime victims, addresses are shown at the block level only and specific locations are not identified. This data includes unverified reports supplied to the Police Department. The preliminary crime classifications may be changed at a later date based upon additional investigation and there is always the possibility of mechanical or human error. Therefore, the Chicago Police Department does not guarantee (either expressed or implied) the accuracy, completeness, timeliness, or correct sequencing of the information and the information should not be used for comparison purposes over time. + + Dataset Source: City of Chicago + + Category: Chicago, Public Safety + + Use: This dataset is publicly available for anyone to use under the following terms provided by the Dataset Source —https://data.cityofchicago.org — and is provided "AS IS" without any warranty, express or implied, from Google. Google disclaims all liability for any damages, direct or indirect, resulting from the use of the dataset. + + Update Frequency: Daily