Skip to content

Commit

Permalink
feat: add functions for managing batch data (#34, #37) (#35)
Browse files Browse the repository at this point in the history
Closes: #37
  • Loading branch information
holtgrewe committed Nov 18, 2022
1 parent c903546 commit 0c7e0f9
Show file tree
Hide file tree
Showing 23 changed files with 1,044 additions and 172 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,10 @@ ClinVar Submission via API Made Easy
- Free software: MIT license
- Documentation: https://clinvar-this.readthedocs.io/en/latest/


## Caveats

- **The `--use-testing` and `--dry-run` mode.**
When enabling `--use-testing`, an alternative API endpoint provided by ClinVar will be used.
This endpoint may use a different schema than the official endpoint (e.g., this has happened in November 2022).
ClinVar has previously notified their submitters via email without official news posts.
91 changes: 81 additions & 10 deletions clinvar_api/client.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""REST API client code for communicating with server endpoints."""

import json
import typing

import attrs
import cattrs
from jsonschema import ValidationError
from logzero import logger
import requests

from clinvar_api import common, exceptions, models, msg
from clinvar_api import common, exceptions, models, msg, schemas

#: URL of the server endpoint (non-test/production).
ENDPOINT_URL_PROD = "https://submit.ncbi.nlm.nih.gov/api/v1/submissions/"
Expand Down Expand Up @@ -39,30 +42,67 @@ class Config:
#: Whether to use dry running.
use_dryrun: bool = False

#: Whether to validate submission payload before posting.
presubmission_validation: bool = True

def submit_data(payload: models.SubmissionContainer, config: Config) -> models.Created:

def submit_data(submission_container: models.SubmissionContainer, config: Config) -> models.Created:
"""Submit new data to ClinVar API.
:param payload: The submission data.
:param config: The connfiguration to use.
:return: The information about the created submission.
:raises exceptions.SubmissionFailed: on problems with the submission.
"""
logger.info("Submitting with config %s", config)

url_prefix = ENDPOINT_URL_TEST if config.use_testing else ENDPOINT_URL_PROD
url_suffix = SUFFIX_DRYRUN if config.use_dryrun else ""
url = f"{url_prefix}{url_suffix}"
logger.debug("Will submit to URL %s", url)
headers = {
"Content-type": "application/json",
"SP-API-KEY": config.auth_token,
}
response = requests.post(url, headers=headers, data=cattrs.unstructure(payload.to_msg()))

payload = cattrs.unstructure(submission_container.to_msg())
logger.debug("Payload data is %s", json.dumps(payload, indent=2))
cleaned_payload = common.clean_for_json(payload)
logger.debug("Cleaned payload data is %s", json.dumps(cleaned_payload, indent=2))
if config.presubmission_validation:
logger.info("Validating payload...")
schemas.validate_submission_payload(cleaned_payload)
logger.info("... done validating payload")
else:
logger.info("Configured to NOT validate payload before submission")

post_data = {
"actions": [
{"type": "AddData", "targetDb": "clinvar", "data": {"content": cleaned_payload}}
]
}
logger.debug("Overall POST payload is %s", post_data)

response = requests.post(url, headers=headers, json=post_data)

if response.ok:
created_msg = common.CONVERTER.structure(response.json(), msg.Created)
return models.Created.from_msg(created_msg)
logger.info("API returned OK - %s: %s", response.status_code, response.reason)
if response.status_code == 204: # no content, on dry-run
logger.info("Server returned '204: No Content', constructing fake created message.")
return models.Created(id="--NONE--dry-run-result--")
else:
created_msg = common.CONVERTER.structure(response.json(), msg.Created)
return models.Created.from_msg(created_msg)
else:
logger.warning("API returned an error - %s: %s", response.status_code, response.reason)
error_msg = common.CONVERTER.structure(response.json(), msg.Error)
error_obj = models.Error.from_msg(error_msg)
raise exceptions.SubmissionFailed(f"ClinVar submission failed: {error_obj.message}")
logger.debug("Full server response is %s", response.json())
if hasattr(error_obj, "errors"):
raise exceptions.SubmissionFailed(
f"ClinVar submission failed: {error_obj.message}, errors: {error_obj.errors}"
)
else:
raise exceptions.SubmissionFailed(f"ClinVar submission failed: {error_obj.message}")


@attrs.define(frozen=True)
Expand All @@ -75,10 +115,20 @@ class RetrieveStatusResult:
summaries: typing.Dict[str, models.SummaryResponse]


def _retrieve_status_summary(url: str) -> models.SummaryResponse:
def _retrieve_status_summary(
url: str, validate_response_json: bool = True
) -> models.SummaryResponse:
"""Retrieve status summary from the given URL."""
response = requests.get(url)
if response.ok:
response_json = response.json()
if validate_response_json:
logger.debug("Validating status summary response ...")
try:
schemas.validate_status_summary(response_json)
except ValidationError as e:
logger.warning("Response summary validation JSON is invalid: %s", e)
logger.debug("... done validating status summary response")
sr_msg = cattrs.structure(response.json(), msg.SummaryResponse)
return models.SummaryResponse.from_msg(sr_msg)
else:
Expand All @@ -90,7 +140,7 @@ def _retrieve_status_summary(url: str) -> models.SummaryResponse:
def retrieve_status(
submission_id: str,
config: Config,
):
) -> RetrieveStatusResult:
"""Retrieve submission status from API.
:param submission_id: The identifier of the submission as returned earlier from API.
Expand All @@ -102,20 +152,41 @@ def retrieve_status(
url_suffix = SUFFIX_DRYRUN if config.use_dryrun else ""
url = f"{url_prefix}{submission_id}/actions/{url_suffix}"
headers = {
"Content-type": "application/json",
"SP-API-KEY": config.auth_token,
}
logger.debug("Will query URL %s", url)
response = requests.get(url, headers=headers)
if response.ok:
logger.info("API returned OK - %s: %s", response.status_code, response.reason)
logger.debug("Structuring response ...")
status_msg = common.CONVERTER.structure(response.json(), msg.SubmissionStatus)
logger.debug(
"structured response is %s",
json.dumps(common.CONVERTER.unstructure(status_msg), indent=2),
)
logger.debug("... done structuring response")
status_obj = models.SubmissionStatus.from_msg(status_msg)
logger.info(
"Attempting to fetch %d status summary files...",
len(
[
None
for action in status_obj.actions
for action_response in action.responses
for _ in action_response.files
]
),
)
summaries = {}
for action in status_obj.actions:
for action_response in action.responses:
for file_ in action_response.files:
logger.info(" - fetching %s", file_.url)
summaries[file_.url] = _retrieve_status_summary(file_.url)
logger.info("... done fetching status summary files")
return RetrieveStatusResult(status=status_obj, summaries=summaries)
else:
logger.info("API returned an error %s: %s", response.status_code, response.reason)
response_json = response.json()
raise exceptions.QueryFailed(f"ClinVar query failed: {response_json}")

Expand Down
15 changes: 15 additions & 0 deletions clinvar_api/common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import typing
import uuid

import cattr
Expand All @@ -23,3 +24,17 @@ def _setup_converter() -> cattr.Converter:

#: cattr Converter to use
CONVERTER = _setup_converter()


def clean_for_json(
value: typing.Union[
bool, int, float, typing.List[typing.Any], None, typing.Dict[str, typing.Any]
]
) -> typing.Union[bool, int, float, typing.List[typing.Any], None, typing.Dict[str, typing.Any]]:
"""Clean the given value for JSON submission."""
if isinstance(value, dict):
return {k: clean_for_json(v) for k, v in value.items() if v is not None}
elif isinstance(value, list):
return [clean_for_json(elem) for elem in value]
else:
return value
2 changes: 1 addition & 1 deletion clinvar_api/models/sub_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def from_msg(cls, other: msg.Error):

@attrs.define(frozen=True)
class SummaryResponseErrorInput:
value: str
value: typing.Optional[str] = None
field: typing.Optional[str] = None

@classmethod
Expand Down
1 change: 1 addition & 0 deletions clinvar_api/msg/query_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


class ErrorCode(Enum):
SUCCESS = "0"
PARTIAL_SUCCESS = "1"
ALL_FAILURE = "2"

Expand Down
2 changes: 1 addition & 1 deletion clinvar_api/msg/sub_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class Error:

@attrs.define(frozen=True)
class SummaryResponseErrorInput:
value: str
value: typing.Optional[str] = None
field: typing.Optional[str] = None


Expand Down
23 changes: 23 additions & 0 deletions clinvar_api/schemas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Helpers for schema validation."""

import json
import pathlib
import typing

from jsonschema import validate


def validate_submission_payload(payload: typing.Any):
schema_path = pathlib.Path(__file__).parent / "submission_schema.json"
with schema_path.open("rt") as inputf:
schema = json.load(inputf)

validate(instance=payload, schema=schema)


def validate_status_summary(payload: typing.Any):
schema_path = pathlib.Path(__file__).parent / "summary_response_schema.json"
with schema_path.open("rt") as inputf:
schema = json.load(inputf)

validate(instance=payload, schema=schema)
3 changes: 0 additions & 3 deletions clinvar_api/schemas/summary_response_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,6 @@
"description": "The field names and values that are erroneous",
"items": {
"type": "object",
"required": [
"value"
],
"properties": {
"field": {
"type": "string",
Expand Down

0 comments on commit 0c7e0f9

Please sign in to comment.