Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

140 validation metadata #165

Merged
merged 16 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 5 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/sbl_filing_api/entities/models/dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class SubmissionDAO(Base):
accepter: Mapped[UserActionDAO] = relationship(lazy="selectin", foreign_keys=[accepter_id])
state: Mapped[SubmissionState] = mapped_column(SAEnum(SubmissionState))
validation_ruleset_version: Mapped[str] = mapped_column(nullable=True)
validation_json: Mapped[List[dict[str, Any]]] = mapped_column(JSON, nullable=True)
validation_json: Mapped[dict[str, Any]] = mapped_column(JSON, nullable=True)
submission_time: Mapped[datetime] = mapped_column(server_default=func.now())
filename: Mapped[str]

Expand Down
2 changes: 1 addition & 1 deletion src/sbl_filing_api/entities/models/dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class SubmissionDTO(BaseModel):
id: int | None = None
state: SubmissionState | None = None
validation_ruleset_version: str | None = None
validation_json: List[Dict[str, Any]] | None = None
validation_json: Dict[str, Any] | None = None
submission_time: datetime | None = None
filename: str
submitter: UserActionDTO
Expand Down
24 changes: 21 additions & 3 deletions src/sbl_filing_api/services/submission_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from io import BytesIO
from fastapi import UploadFile
from regtech_data_validator.create_schemas import validate_phases
from regtech_data_validator.create_schemas import validate_phases, ValidationPhase
from regtech_data_validator.data_formatters import df_to_json, df_to_download
from regtech_data_validator.checks import Severity
import pandas as pd
Expand Down Expand Up @@ -105,7 +105,7 @@ async def validate_and_update_submission(period_code: str, lei: str, submission:
)
else:
submission.state = SubmissionState.VALIDATION_SUCCESSFUL
submission.validation_json = json.loads(df_to_json(result[1]))
submission.validation_json = build_validation_results(result)
submission_report = df_to_download(result[1])
await upload_to_storage(
period_code, lei, str(submission.id) + REPORT_QUALIFIER, submission_report.encode("utf-8")
Expand All @@ -116,7 +116,6 @@ async def validate_and_update_submission(period_code: str, lei: str, submission:
log.error("The file is malformed", re, exc_info=True, stack_info=True)
submission.state = SubmissionState.SUBMISSION_UPLOAD_MALFORMED
await update_submission(submission)

except Exception as e:
log.error(
f"Validation for submission {submission.id} did not complete due to an unexpected error.",
Expand All @@ -126,3 +125,22 @@ async def validate_and_update_submission(period_code: str, lei: str, submission:
)
submission.state = SubmissionState.VALIDATION_ERROR
await update_submission(submission)


def build_validation_results(result):
val_json = json.loads(df_to_json(result[1]))

if result[2] == ValidationPhase.SYNTACTICAL.value:
val_res = {"syntax_errors": {"count": 0, "details": []}}
val_res["syntax_errors"]["details"] = val_json
val_res["syntax_errors"]["count"] = len(val_res["syntax_errors"]["details"])
Copy link
Contributor

@jcadam14 jcadam14 Apr 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just like below, this section can be shortened to just:
val_res = {"syntax_errors": {"count": len(val_json), "details": val_json}}

else:
errors_list = [e for e in val_json if e["validation"]["severity"] == Severity.ERROR.value]
warnings_list = [w for w in val_json if w["validation"]["severity"] == Severity.WARNING.value]
val_res = {
"syntax_errors": {"count": 0, "details": []},
"logic_errors": {"count": len(errors_list), "details": errors_list},
"logic_warnings": {"count": len(warnings_list), "details": warnings_list},
}

return val_res
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This else block can be even further refined to:

        errors_list = [e for e in val_json if e["validation"]["severity"] == Severity.ERROR.value]
        warnings_list = [e for e in val_json if e["validation"]["severity"] == Severity.WARNING.value]
        val_res = {
            "syntax_errors": {"count": 0, "details": []},
            "logic_errors": {"count": len(errors_list), "details": errors_list},
            "logic_warnings": {"count": len(warnings_list), "details": warnings_list},
        }

to avoid looping and appending. List comprehension in python is faster on large data sets.

6 changes: 3 additions & 3 deletions tests/services/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ def warning_submission_mock(mocker: MockerFixture, validate_submission_mock: Moc


@pytest.fixture(scope="function")
def df_to_json_mock(mocker: MockerFixture, validate_submission_mock: Mock):
mock_json_formatting = mocker.patch("sbl_filing_api.services.submission_processor.df_to_json")
mock_json_formatting.return_value = "[{}]"
def build_validation_results_mock(mocker: MockerFixture, validate_submission_mock: Mock):
mock_json_formatting = mocker.patch("sbl_filing_api.services.submission_processor.build_validation_results")
mock_json_formatting.return_value = "{}"
return mock_json_formatting


Expand Down
200 changes: 197 additions & 3 deletions tests/services/test_submission_processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import asyncio

import pandas as pd
import pytest

from http import HTTPStatus
Expand All @@ -8,6 +10,8 @@
from pytest_mock import MockerFixture
from sbl_filing_api.config import FsProtocol, settings
from sbl_filing_api.entities.models.dao import SubmissionDAO, SubmissionState
from regtech_data_validator.create_schemas import ValidationPhase
from regtech_data_validator.checks import Severity
from regtech_api_commons.api.exceptions import RegTechHttpException


Expand Down Expand Up @@ -104,7 +108,11 @@ def test_file_not_supported_file_size_too_large(self, mock_upload_file: Mock):
assert e.value.status_code == HTTPStatus.REQUEST_ENTITY_TOO_LARGE

async def test_validate_and_update_successful(
self, mocker: MockerFixture, successful_submission_mock: Mock, df_to_json_mock: Mock, df_to_download_mock: Mock
self,
mocker: MockerFixture,
successful_submission_mock: Mock,
build_validation_results_mock: Mock,
df_to_download_mock: Mock,
):
mock_sub = SubmissionDAO(
id=1,
Expand All @@ -128,7 +136,11 @@ async def test_validate_and_update_successful(
assert successful_submission_mock.mock_calls[1].args[0].state == "VALIDATION_SUCCESSFUL"

async def test_validate_and_update_warnings(
self, mocker: MockerFixture, warning_submission_mock: Mock, df_to_json_mock: Mock, df_to_download_mock: Mock
self,
mocker: MockerFixture,
warning_submission_mock: Mock,
build_validation_results_mock: Mock,
df_to_download_mock: Mock,
):
mock_sub = SubmissionDAO(
id=1,
Expand All @@ -151,7 +163,11 @@ async def test_validate_and_update_warnings(
assert warning_submission_mock.mock_calls[1].args[0].state == "VALIDATION_WITH_WARNINGS"

async def test_validate_and_update_errors(
self, mocker: MockerFixture, error_submission_mock: Mock, df_to_json_mock: Mock, df_to_download_mock: Mock
self,
mocker: MockerFixture,
error_submission_mock: Mock,
build_validation_results_mock: Mock,
df_to_download_mock: Mock,
):
mock_sub = SubmissionDAO(
id=1,
Expand Down Expand Up @@ -288,3 +304,181 @@ async def mock_validate_and_update_submission(
exc_info=True,
stack_info=True,
)

async def test_build_validation_results_success(self):
result = (True, pd.DataFrame, ValidationPhase.LOGICAL.value)
validation_json = submission_processor.build_validation_results(result)
assert validation_json["syntax_errors"]["count"] == 0
assert validation_json["logic_errors"]["count"] == 0
assert validation_json["logic_warnings"]["count"] == 0

async def test_build_validation_results_syntax_errors(self):
result = (
False,
pd.DataFrame(
[
[
1,
ValidationPhase.SYNTACTICAL.value,
"TESTLEI1234567890123",
"field_in_error",
1,
Severity.ERROR.value,
"test_link",
"VALID123",
"validation_name_goes_here",
"this is a val desc",
"multi-field",
],
],
columns=[
"record_no",
"validation_phase",
"uid",
"field_name",
"field_value",
"validation_severity",
"fig_link",
"validation_id",
"validation_name",
"validation_desc",
"scope",
],
),
ValidationPhase.SYNTACTICAL.value,
)
validation_json = submission_processor.build_validation_results(result)
assert validation_json["syntax_errors"]["count"] > 0

async def test_build_validation_results_logic_warnings(self):
result = (
False,
pd.DataFrame(
[
[
1,
ValidationPhase.LOGICAL.value,
"TESTLEI1234567890123",
"field_in_error",
1,
Severity.WARNING.value,
"test_link",
"VALID123",
"validation_name_goes_here",
"this is a val desc",
"multi-field",
],
],
columns=[
"record_no",
"validation_phase",
"uid",
"field_name",
"field_value",
"validation_severity",
"fig_link",
"validation_id",
"validation_name",
"validation_desc",
"scope",
],
),
ValidationPhase.LOGICAL.value,
)
validation_json = submission_processor.build_validation_results(result)
assert validation_json["syntax_errors"]["count"] == 0
assert validation_json["logic_errors"]["count"] == 0
assert validation_json["logic_warnings"]["count"] > 0

async def test_build_validation_results_logic_errors(self):
result = (
False,
pd.DataFrame(
[
[
1,
ValidationPhase.LOGICAL.value,
"TESTLEI1234567890123",
"field_in_error",
1,
Severity.ERROR.value,
"test_link",
"VALID123",
"validation_name_goes_here",
"this is a val desc",
"multi-field",
],
],
columns=[
"record_no",
"validation_phase",
"uid",
"field_name",
"field_value",
"validation_severity",
"fig_link",
"validation_id",
"validation_name",
"validation_desc",
"scope",
],
),
ValidationPhase.LOGICAL.value,
)
validation_json = submission_processor.build_validation_results(result)
assert validation_json["syntax_errors"]["count"] == 0
assert validation_json["logic_errors"]["count"] > 0
assert validation_json["logic_warnings"]["count"] == 0

async def test_build_validation_results_logic_warnings_and_errors(self):
result = (
False,
pd.DataFrame(
[
[
1,
ValidationPhase.LOGICAL.value,
"TESTLEI1234567890123",
"field_in_error",
1,
Severity.WARNING.value,
"test_link",
"VALID123",
"validation_name_goes_here",
"this is a val desc",
"multi-field",
],
[
2,
ValidationPhase.LOGICAL.value,
"TESTLEI1234567890123",
"field_in_error",
1,
Severity.ERROR.value,
"test_link",
"VALID234",
"validation_name_goes_here",
"this is a val desc",
"multi-field",
],
],
columns=[
"record_no",
"validation_phase",
"uid",
"field_name",
"field_value",
"validation_severity",
"fig_link",
"validation_id",
"validation_name",
"validation_desc",
"scope",
],
),
ValidationPhase.LOGICAL.value,
)
validation_json = submission_processor.build_validation_results(result)
assert validation_json["syntax_errors"]["count"] == 0
assert validation_json["logic_errors"]["count"] > 0
assert validation_json["logic_warnings"]["count"] > 0