Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

140 validation metadata #165

Merged
merged 16 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
379 changes: 192 additions & 187 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/sbl_filing_api/entities/models/dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class SubmissionDAO(Base):
accepter: Mapped[AccepterDAO] = relationship("AccepterDAO", lazy="joined")
state: Mapped[SubmissionState] = mapped_column(SAEnum(SubmissionState))
validation_ruleset_version: Mapped[str] = mapped_column(nullable=True)
validation_json: Mapped[List[dict[str, Any]]] = mapped_column(JSON, nullable=True)
validation_json: Mapped[dict[str, Any]] = mapped_column(JSON, nullable=True)
submission_time: Mapped[datetime] = mapped_column(server_default=func.now())
filename: Mapped[str]

Expand Down
2 changes: 1 addition & 1 deletion src/sbl_filing_api/entities/models/dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class SubmissionDTO(BaseModel):
id: int | None = None
state: SubmissionState | None = None
validation_ruleset_version: str | None = None
validation_json: List[Dict[str, Any]] | None = None
validation_json: Dict[str, Any] | None = None
submission_time: datetime | None = None
filename: str
submitter: SubmitterDTO | None = None
Expand Down
30 changes: 28 additions & 2 deletions src/sbl_filing_api/services/submission_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from io import BytesIO
from fastapi import UploadFile
from regtech_data_validator.create_schemas import validate_phases
from regtech_data_validator.create_schemas import validate_phases, ValidationPhase
from regtech_data_validator.data_formatters import df_to_json, df_to_download
from regtech_data_validator.checks import Severity
import pandas as pd
Expand Down Expand Up @@ -83,7 +83,7 @@ async def validate_and_update_submission(period_code: str, lei: str, submission:
)
else:
submission.state = SubmissionState.VALIDATION_SUCCESSFUL
submission.validation_json = json.loads(df_to_json(result[1]))
submission.validation_json = build_validation_results(result)
submission_report = df_to_download(result[1])
await upload_to_storage(
period_code, lei, str(submission.id) + REPORT_QUALIFIER, submission_report.encode("utf-8")
Expand All @@ -95,3 +95,29 @@ async def validate_and_update_submission(period_code: str, lei: str, submission:
submission.state = SubmissionState.SUBMISSION_UPLOAD_MALFORMED
await update_submission(submission)
raise HTTPException(status_code=HTTPStatus.UNPROCESSABLE_ENTITY, detail=re)


def build_validation_results(result):
val_json = json.loads(df_to_json(result[1]))
val_res = {}

if result[2] == ValidationPhase.SYNTACTICAL.value:
val_res["syntax_errors"] = {"count": 0, "details": []}
val_res["syntax_errors"]["details"] = val_json
val_res["syntax_errors"]["count"] = len(val_res["syntax_errors"]["details"])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these can just be assigned directly instead of having an inititializing step.

Copy link
Contributor

@jcadam14 jcadam14 Apr 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just like below, this section can be shortened to just:
val_res = {"syntax_errors": {"count": len(val_json), "details": val_json}}

else:
val_res = {
"syntax_errors": {"count": 0, "details": []},
"logic_errors": {"count": 0, "details": []},
"logic_warnings": {"count": 0, "details": []},
}
for v in val_json:
if v["validation"]["severity"] == Severity.WARNING.value:
val_res["logic_warnings"]["details"].append(v)
elif v["validation"]["severity"] == Severity.ERROR.value:
val_res["logic_errors"]["details"].append(v)
lchen-2101 marked this conversation as resolved.
Show resolved Hide resolved
val_res["logic_warnings"]["count"], val_res["logic_errors"]["count"] = len(
val_res["logic_warnings"]["details"]
), len(val_res["logic_errors"]["details"])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using tuple destructuring doesn't add much value here... you save 1 keystroke of not having to type out an additional =; but the formatting makes it a little more complicated to read.


return val_res
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This else block can be even further refined to:

        errors_list = [e for e in val_json if e["validation"]["severity"] == Severity.ERROR.value]
        warnings_list = [e for e in val_json if e["validation"]["severity"] == Severity.WARNING.value]
        val_res = {
            "syntax_errors": {"count": 0, "details": []},
            "logic_errors": {"count": len(errors_list), "details": errors_list},
            "logic_warnings": {"count": len(warnings_list), "details": warnings_list},
        }

to avoid looping and appending. List comprehension in python is faster on large data sets.

6 changes: 3 additions & 3 deletions tests/services/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ def warning_submission_mock(mocker: MockerFixture, validate_submission_mock: Moc


@pytest.fixture(scope="function")
def df_to_json_mock(mocker: MockerFixture, validate_submission_mock: Mock):
mock_json_formatting = mocker.patch("sbl_filing_api.services.submission_processor.df_to_json")
mock_json_formatting.return_value = "[{}]"
def build_validation_results_mock(mocker: MockerFixture, validate_submission_mock: Mock):
mock_json_formatting = mocker.patch("sbl_filing_api.services.submission_processor.build_validation_results")
mock_json_formatting.return_value = "{}"
return mock_json_formatting


Expand Down
18 changes: 15 additions & 3 deletions tests/services/test_submission_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,11 @@ def test_file_not_supported_file_size_too_large(self, mock_upload_file: Mock):
assert e.value.status_code == HTTPStatus.REQUEST_ENTITY_TOO_LARGE

async def test_validate_and_update_successful(
self, mocker: MockerFixture, successful_submission_mock: Mock, df_to_json_mock: Mock, df_to_download_mock: Mock
self,
mocker: MockerFixture,
successful_submission_mock: Mock,
build_validation_results_mock: Mock,
df_to_download_mock: Mock,
):
mock_sub = SubmissionDAO(
id=1,
Expand All @@ -129,7 +133,11 @@ async def test_validate_and_update_successful(
assert successful_submission_mock.mock_calls[1].args[0].state == "VALIDATION_SUCCESSFUL"

async def test_validate_and_update_warnings(
self, mocker: MockerFixture, warning_submission_mock: Mock, df_to_json_mock: Mock, df_to_download_mock: Mock
self,
mocker: MockerFixture,
warning_submission_mock: Mock,
build_validation_results_mock: Mock,
df_to_download_mock: Mock,
):
mock_sub = SubmissionDAO(
id=1,
Expand All @@ -152,7 +160,11 @@ async def test_validate_and_update_warnings(
assert warning_submission_mock.mock_calls[1].args[0].state == "VALIDATION_WITH_WARNINGS"

async def test_validate_and_update_errors(
self, mocker: MockerFixture, error_submission_mock: Mock, df_to_json_mock: Mock, df_to_download_mock: Mock
self,
mocker: MockerFixture,
error_submission_mock: Mock,
build_validation_results_mock: Mock,
df_to_download_mock: Mock,
):
mock_sub = SubmissionDAO(
id=1,
Expand Down