Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add downstream referencing views to raw data gitbook documentation (R…
…ecidiviz/recidiviz-data#29334) ## Description of the change - Update the gitbook state raw data page to rename the “Referencing Views” column to “Referencing Ingest Views” and add a second column, “Referencing Downstream Views” which is hydrated with the contents of raw_data_reference_reasons.yaml to list any downstream views that reference each raw data table. - Refactor raw_data_reference_reasons.yaml parsing to a separate class ## Type of change > All pull requests must have at least one of the following labels applied (otherwise the PR will fail): | Label | Description | |----------------------------- |----------------------------------------------------------------------------------------------------------- | | Type: Bug | non-breaking change that fixes an issue | | Type: Feature | non-breaking change that adds functionality | | Type: Breaking Change | fix or feature that would cause existing functionality to not work as expected | | Type: Non-breaking refactor | change addresses some tech debt item or prepares for a later change, but does not change functionality | | Type: Configuration Change | adjusts configuration to achieve some end related to functionality, development, performance, or security | | Type: Dependency Upgrade | upgrades a project dependency - these changes are not included in release notes | ## Related issues Closes Recidiviz/recidiviz-data#29124 ## Checklists ### Development **This box MUST be checked by the submitter prior to merging**: - [x] **Double- and triple-checked that there is no Personally Identifiable Information (PII) being mistakenly added in this pull request** These boxes should be checked by the submitter prior to merging: - [x] Tests have been written to cover the code changed/added as part of this pull request ### Code review These boxes should be checked by reviewers prior to merging: - [x] This pull request has a descriptive title and information useful to a reviewer - [x] Potential security implications or infrastructural changes have been considered, if relevant GitOrigin-RevId: 3377905846046554656398d68823f23135dcc2fe
- Loading branch information
1 parent
ca05752
commit b838304
Showing
5 changed files
with
276 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
121 changes: 121 additions & 0 deletions
121
recidiviz/tests/tools/raw_data_reference_reasons_yaml_loader_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
# Recidiviz - a data platform for criminal justice reform | ||
# Copyright (C) 2023 Recidiviz, Inc. | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
# ============================================================================= | ||
"""Tests for raw_data_reference_reasons_yaml_loader.py.""" | ||
import unittest | ||
from collections import defaultdict | ||
from unittest.mock import mock_open, patch | ||
|
||
import yaml | ||
from mock import MagicMock | ||
|
||
from recidiviz.big_query.big_query_address import BigQueryAddress | ||
from recidiviz.common.constants.states import StateCode | ||
from recidiviz.tools.raw_data_reference_reasons_yaml_loader import ( | ||
RawDataReferenceReasonsYamlLoader, | ||
) | ||
|
||
mock_yaml_content = """ | ||
US_XX: | ||
table1: | ||
dataset1.table1: |- | ||
Usage reason unknown. | ||
dataset2.table2: |- | ||
Usage reason unknown. | ||
US_YY: | ||
table2: | ||
dataset3.table3: |- | ||
Usage reason unknown. | ||
""" | ||
mock_yaml_invalid_content = """ | ||
US_NOT_REAL: | ||
table1: | ||
dataset1.table1: |- | ||
Usage reason unknown. | ||
""" | ||
mock_raw_data = { | ||
"US_XX": { | ||
"table1": { | ||
"dataset1.table1": "Usage reason unknown.", | ||
"dataset2.table2": "Usage reason unknown.", | ||
} | ||
}, | ||
"US_YY": {"table2": {"dataset3.table3": "Usage reason unknown."}}, | ||
} | ||
mock_converted_data = { | ||
StateCode.US_XX: { | ||
"table1": { | ||
BigQueryAddress.from_str("dataset1.table1"), | ||
BigQueryAddress.from_str("dataset2.table2"), | ||
} | ||
}, | ||
StateCode.US_YY: {"table2": {BigQueryAddress.from_str("dataset3.table3")}}, | ||
} | ||
|
||
|
||
class TestRawDataReferenceReasonsYamlLoader(unittest.TestCase): | ||
"""Test raw data reference reasons yaml loader.""" | ||
|
||
def setUp(self) -> None: | ||
RawDataReferenceReasonsYamlLoader.reset_data() | ||
|
||
@patch("builtins.open", new_callable=mock_open, read_data=mock_yaml_content) | ||
@patch("yaml.safe_load", side_effect=yaml.YAMLError("error parsing YAML")) | ||
def test_load_yaml_failure(self, _1: MagicMock, _2: MagicMock) -> None: | ||
with self.assertRaises(RuntimeError): | ||
RawDataReferenceReasonsYamlLoader.get_yaml_data() | ||
with self.assertRaises(RuntimeError): | ||
RawDataReferenceReasonsYamlLoader.get_raw_yaml_data() | ||
|
||
@patch("builtins.open", new_callable=mock_open, read_data=mock_yaml_invalid_content) | ||
def test_parse_yaml_failure(self, _: MagicMock) -> None: | ||
with self.assertRaises(RuntimeError): | ||
RawDataReferenceReasonsYamlLoader.get_yaml_data() | ||
|
||
@patch("builtins.open", new_callable=mock_open, read_data=mock_yaml_content) | ||
def test_load_yaml(self, _: MagicMock) -> None: | ||
self.assertEqual( | ||
RawDataReferenceReasonsYamlLoader.get_yaml_data(), mock_converted_data | ||
) | ||
self.assertEqual( | ||
RawDataReferenceReasonsYamlLoader.get_raw_yaml_data(), mock_raw_data | ||
) | ||
|
||
@patch("builtins.open", new_callable=mock_open, read_data=mock_yaml_content) | ||
def test_get_downstream_referencing_views(self, _: MagicMock) -> None: | ||
result = RawDataReferenceReasonsYamlLoader.get_downstream_referencing_views( | ||
StateCode.US_XX | ||
) | ||
self.assertEqual( | ||
result, | ||
{ | ||
"table1": { | ||
BigQueryAddress.from_str("dataset1.table1"), | ||
BigQueryAddress.from_str("dataset2.table2"), | ||
} | ||
}, | ||
) | ||
|
||
@patch("builtins.open", new_callable=mock_open, read_data=mock_yaml_content) | ||
def test_get_downstream_referencing_views_invalid_state(self, _: MagicMock) -> None: | ||
result = RawDataReferenceReasonsYamlLoader.get_downstream_referencing_views( | ||
StateCode.US_WW | ||
) | ||
self.assertEqual( | ||
result, | ||
defaultdict(set), | ||
) | ||
self.assertEqual(result["non_existent_file_tag"], set()) |
Oops, something went wrong.