Skip to content

Commit

Permalink
Merge pull request #134 from oncokb/split-citations
Browse files Browse the repository at this point in the history
Split citations column to multiple based on data fields
  • Loading branch information
zhx828 committed Nov 12, 2021
2 parents 0a2751f + 9fd9ce1 commit 6c06049
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 51 deletions.
34 changes: 24 additions & 10 deletions AnnotatorCore.py
Expand Up @@ -367,31 +367,34 @@ def processalterationevents(eventfile, outfile, previousoutfile, defaultCancerTy
outf.write("\t" + VARIANT_IN_ONCOKB_HEADER)

outf.write("\tMUTATION_EFFECT")
outf.write("\tMUTATION_EFFECT_CITATIONS")
outf.write("\tONCOGENIC")

newncols += 4
newncols += 5

for l in levels:
outf.write('\t' + l)
newncols += len(levels)

outf.write("\tHIGHEST_LEVEL")
outf.write("\tCITATIONS")
outf.write("\tTX_CITATIONS")
newncols += 2

for l in dxLevels:
outf.write('\t' + l)
newncols += len(dxLevels)

outf.write("\tHIGHEST_DX_LEVEL")
newncols += 1
outf.write("\tDX_CITATIONS")
newncols += 2

for l in pxLevels:
outf.write('\t' + l)
newncols += len(pxLevels)

outf.write("\tHIGHEST_PX_LEVEL")
newncols += 1
outf.write("\tPX_CITATIONS")
newncols += 2

outf.write("\n")

Expand Down Expand Up @@ -1417,8 +1420,11 @@ def gettumortypename(tumortype):
return tumortype['mainType']['name']


def getimplications(oncokbdata, levels, implications):
def getimplications(oncokbdata, implication_type, levels, implications):
citation_column_key = implication_type + '_citations'
for implication in implications:
oncokbdata[citation_column_key] = appendoncokbcitations(oncokbdata[citation_column_key], implication['pmids'],
implication['abstracts'])
level = implication['levelOfEvidence']

if level is not None:
Expand Down Expand Up @@ -1629,8 +1635,12 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
oncokbdata[GENE_IN_ONCOKB_HEADER] = GENE_IN_ONCOKB_DEFAULT
oncokbdata[VARIANT_IN_ONCOKB_HEADER] = VARIANT_IN_ONCOKB_DEFAULT
oncokbdata['mutation_effect'] = ""
oncokbdata['mutation_effect_citations'] = []
oncokbdata['citations'] = []
oncokbdata['oncogenic'] = ""
oncokbdata['tx_citations'] = []
oncokbdata['dx_citations'] = []
oncokbdata['px_citations'] = []

try:
# oncogenic
Expand All @@ -1646,7 +1656,7 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
# mutation effect
if (annotation['mutationEffect'] is not None):
oncokbdata['mutation_effect'] = annotation['mutationEffect']['knownEffect']
oncokbdata['citations'] = appendoncokbcitations(oncokbdata['citations'],
oncokbdata['mutation_effect_citations'] = appendoncokbcitations(oncokbdata['mutation_effect_citations'],
annotation['mutationEffect']['citations']['pmids'],
annotation['mutationEffect']['citations']['abstracts'])

Expand All @@ -1663,7 +1673,7 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
else:
drugs = treatment['drugs']

oncokbdata['citations'] = appendoncokbcitations(oncokbdata['citations'], treatment['pmids'],
oncokbdata['tx_citations'] = appendoncokbcitations(oncokbdata['tx_citations'], treatment['pmids'],
treatment['abstracts'])

if len(drugs) == 0:
Expand All @@ -1676,10 +1686,10 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
if treatmentname not in oncokbdata[level]:
oncokbdata[level].append('+'.join(drugnames))
if annotation['diagnosticImplications'] is not None:
getimplications(oncokbdata, dxLevels, annotation['diagnosticImplications'])
getimplications(oncokbdata, 'dx', dxLevels, annotation['diagnosticImplications'])

if annotation['prognosticImplications'] is not None:
getimplications(oncokbdata, pxLevels, annotation['prognosticImplications'])
getimplications(oncokbdata, 'px', pxLevels, annotation['prognosticImplications'])

oncokbdata['highestDiagnosticImplicationLevel'] = annotation['highestDiagnosticImplicationLevel']
oncokbdata['highestPrognosticImplicationLevel'] = annotation['highestPrognosticImplicationLevel']
Expand All @@ -1701,18 +1711,22 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
ret.append(oncokbdata[GENE_IN_ONCOKB_HEADER])
ret.append(oncokbdata[VARIANT_IN_ONCOKB_HEADER])
ret.append(oncokbdata['mutation_effect'])
ret.append(';'.join(oncokbdata['mutation_effect_citations']))
ret.append(oncokbdata['oncogenic'])
for l in levels:
ret.append(','.join(oncokbdata[l]))
ret.append(gethighestsensitivitylevel(oncokbdata))
ret.append(';'.join(oncokbdata['citations']))
ret.append(';'.join(oncokbdata['tx_citations']))

for l in dxLevels:
ret.append(','.join(oncokbdata[l]))
ret.append(gethighestDxPxlevel(dxLevels, [oncokbdata['highestDiagnosticImplicationLevel']]))
ret.append(';'.join(oncokbdata['dx_citations']))

for l in pxLevels:
ret.append(','.join(oncokbdata[l]))
ret.append(gethighestDxPxlevel(pxLevels, [oncokbdata['highestPrognosticImplicationLevel']]))
ret.append(';'.join(oncokbdata['px_citations']))

return ret

Expand Down
29 changes: 16 additions & 13 deletions README.md
Expand Up @@ -93,19 +93,22 @@ python ${FILE_NAME.py} -i ${INPUT_FILE} -o ${OUTPUT_FILE} -b ${ONCOKB_API_TOKEN}


## Columns added in the annotation files
| Column | Possible Values | Description |
|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| GENE_IN_ONCOKB | TRUE, FALSE | Whether the gene has been curated by the OncoKB Team |
| VARIANT_IN_ONCOKB | TRUE, FALSE | Whether the variant has been curated by the OncoKB Team. Note: when a variant does not exist, it may still have annotations. |
| MUTATION_EFFECT | Gain-of-function, Likely Gain-of-function, Loss-of-function, Likely Loss-of-function, Switch-of-function, Likely Switch-of-function, Neutral, Likely Neutral, Inconclusive, Unknown | The biological effect of a mutation/alteration on the protein function that gives rise to changes in the biological properties of cells expressing the mutant/altered protein compared to cells expressing the wildtype protein. |
| ONCOGENIC | Oncogenic, Likely Oncogenic, Likely Neutral, Inconclusive, Unknown, Resistance | In OncoKB, “oncogenic” is defined as “referring to the ability to induce or cause cancer” as described in the second edition of The Biology of Cancer by Robert Weinberg (2014). |
| LEVEL_* | Therapeutic implications | The leveled therapeutic implications |
| HIGHEST_LEVEL | LEVEL_1, LEVEL_2, LEVEL_3A, LEVEL_3B, LEVEL_4, LEVEL_R1, LEVEL_R2 | The highest level of evidence for therapeutic implications |
| CITATIONS | PMID, Abstract, Website Link | All citations related to a mutation/alteration |
| LEVEL_Dx* | Tumor type the level of evidence is assigned to | The leveled diagnostic implications |
| HIGHEST_DX_LEVEL | LEVEL_Dx1, LEVEL_Dx2, LEVEL_Dx3 | The highest level of evidence for diagnostic implications |
| LEVEL_Px* | Tumor type the level of evidence is assigned to | The leveled prognostic implications |
| HIGHEST_PX_LEVEL | LEVEL_Px1, LEVEL_Px2, LEVEL_Px3 | The highest level of evidence for prognostic implications |
| Column | Possible Values | Description |
|---------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| GENE_IN_ONCOKB | TRUE, FALSE | Whether the gene has been curated by the OncoKB Team |
| VARIANT_IN_ONCOKB | TRUE, FALSE | Whether the variant has been curated by the OncoKB Team. Note: when a variant does not exist, it may still have annotations. |
| MUTATION_EFFECT | Gain-of-function, Likely Gain-of-function, Loss-of-function, Likely Loss-of-function, Switch-of-function, Likely Switch-of-function, Neutral, Likely Neutral, Inconclusive, Unknown | The biological effect of a mutation/alteration on the protein function that gives rise to changes in the biological properties of cells expressing the mutant/altered protein compared to cells expressing the wildtype protein. |
| MUTATION_EFFECT_CITATIONS | PMID, Abstract, Website Link | All citations related to the biological effect |
| ONCOGENIC | Oncogenic, Likely Oncogenic, Likely Neutral, Inconclusive, Unknown, Resistance | In OncoKB, “oncogenic” is defined as “referring to the ability to induce or cause cancer” as described in the second edition of The Biology of Cancer by Robert Weinberg (2014). |
| LEVEL_* | Therapeutic implications | The leveled therapeutic implications |
| HIGHEST_LEVEL | LEVEL_1, LEVEL_2, LEVEL_3A, LEVEL_3B, LEVEL_4, LEVEL_R1, LEVEL_R2 | The highest level of evidence for therapeutic implications |
| TX_CITATIONS | PMID, Abstract, Website Link | All citations related to therapeutic implications |
| LEVEL_Dx* | Tumor type the level of evidence is assigned to | The leveled diagnostic implications |
| HIGHEST_DX_LEVEL | LEVEL_Dx1, LEVEL_Dx2, LEVEL_Dx3 | The highest level of evidence for diagnostic implications |
| DX_CITATIONS | PMID, Abstract, Website Link | All citations related to diagnostic implications |
| LEVEL_Px* | Tumor type the level of evidence is assigned to | The leveled prognostic implications |
| HIGHEST_PX_LEVEL | LEVEL_Px1, LEVEL_Px2, LEVEL_Px3 | The highest level of evidence for prognostic implications |
| PX_CITATIONS | PMID, Abstract, Website Link | All citations related to prognostic implications |

## Questions?
The best way is to email contact@oncokb.org so all our team members can help.

0 comments on commit 6c06049

Please sign in to comment.