From 2424d8855f8694b66ff0c64cbae8b0ad97e100e8 Mon Sep 17 00:00:00 2001 From: Chiara Rasi Date: Mon, 2 May 2022 08:17:38 +0200 Subject: [PATCH] Script to fix OMIM format for old cases (#3304) * Script to fix OMIM format for old cases * Added the --fix option * Removed unused imports * Same script as in test Co-authored-by: Daniel Nilsson --- CHANGELOG.md | 1 + scout/models/case/case.py | 10 +---- scripts/convert_case_omim_format.py | 69 +++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 scripts/convert_case_omim_format.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 24dab3a674..df716622b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/) - Rank score results now show the ranking range - cDNA and protein changes displayed on institute causatives pages - Optional SESSION_TIMEOUT_MINUTES configuration in app config files +- Script to convert old OMIM case format (list of integers) to new format (list of dictionaries) ### Changed - Verify user before redirecting to IGV alignments and sashimi plots - Build case IGV tracks starting from case and variant objects instead of passing all params in a form diff --git a/scout/models/case/case.py b/scout/models/case/case.py index 78097b86e5..a5b23890e4 100644 --- a/scout/models/case/case.py +++ b/scout/models/case/case.py @@ -1,16 +1,8 @@ from __future__ import absolute_import import logging -import os from datetime import datetime -from scout.constants import ANALYSIS_TYPES -from scout.models import PhenotypeTerm -from scout.models.panel import GenePanel - -from . import STATUS -from .individual import Individual - logger = logging.getLogger(__name__) individual = dict( @@ -59,7 +51,7 @@ created_at=datetime, delivery_report=str, # delivery report is a path to html file diagnosis_genes=list, # List of references to genes - diagnosis_phenotypes=list, # List of references to diseases + diagnosis_phenotypes=list, # List of dictionaries with OMIM disease data display_name=str, # required. This is the case name that will be shown in scout. dynamic_gene_list=list, # List of genes gene_fusion_report=str, # Path to the gene fusions report file diff --git a/scripts/convert_case_omim_format.py b/scripts/convert_case_omim_format.py new file mode 100644 index 0000000000..93c9b5df9c --- /dev/null +++ b/scripts/convert_case_omim_format.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import click +from pymongo import MongoClient + +CASES_WITH_DIA = { + "diagnosis_phenotypes": {"$exists": True, "$ne": []} +} # MongoDB query to locate cases with any diagnosis + +SELECT_FIELDS = { + "owner": 1, + "display_name": 1, + "diagnosis_phenotypes": 1, +} # select only a few important fields using the query above + + +@click.command() +@click.option("--db-uri", required=True, help="mongodb://user:password@db_url:db_port") +@click.option("--db-name", required=True, help="db name") +@click.option("--fix", help="Use this flag to fix the OMIM format in old cases", is_flag=True) +def omim_case_fix_format(db_uri, db_name, fix): + try: + client = MongoClient(db_uri) + db = client[db_name] + # test connection + click.echo("database connection info:{}".format(db)) + + cases_with_dia = list(db.case.find(CASES_WITH_DIA, SELECT_FIELDS)) + click.echo(f"Total number of cases with diagnosis:{len(cases_with_dia)}") + + # Display cases with old format of diagnosis (a list of integers) + cases_with_old_dia = [ + case for case in cases_with_dia if isinstance(case["diagnosis_phenotypes"][0], int) + ] + click.echo(f"Total number of cases with old diagnosis format:{len(cases_with_old_dia)}") + + for i, case in enumerate(cases_with_old_dia): + click.echo(f"n:{i}\t{case['owner']}\t{case['display_name']}") + old_dia = case["diagnosis_phenotypes"] + new_dia = [] + + for dia_nr in old_dia: + disease_term = db.disease_term.find_one({"disease_nr": dia_nr}) + if disease_term is None: + click.echo(f"Could not find a disease term with id:{dia_nr}") + continue + new_dia.append( + { + "disease_nr": dia_nr, + "disease_id": disease_term["disease_id"], + "description": disease_term["description"], + } + ) + + if fix is False: + new_dia = old_dia + else: + db.case.find_one_and_update( + {"_id": case["_id"]}, {"$set": {"diagnosis_phenotypes": new_dia}} + ) + click.echo(f"old dia:{old_dia}--->new dia:{new_dia}\n") + + except Exception as err: + click.echo("Error {}".format(err)) + + +if __name__ == "__main__": + omim_case_fix_format()