diff --git a/AnnotatorCore.py b/AnnotatorCore.py index 916c9fa..89d9bd0 100644 --- a/AnnotatorCore.py +++ b/AnnotatorCore.py @@ -202,7 +202,7 @@ def setsampleidsfileterfile(f): GC_VAR_ALLELE_1_HEADER = 'TUMOR_SEQ_ALLELE1' GC_VAR_ALLELE_2_HEADER = 'TUMOR_SEQ_ALLELE2' GENOMIC_CHANGE_HEADERS = [GC_CHROMOSOME_HEADER, GC_START_POSITION_HEADER, GC_END_POSITION_HEADER, GC_REF_ALLELE_HEADER, - GC_VAR_ALLELE_1_HEADER, GC_VAR_ALLELE_2_HEADER] + GC_VAR_ALLELE_2_HEADER] # columns for structural variant annotation SV_GENEA_HEADER = ['SITE1_GENE', 'GENEA', 'GENE1', 'SITE1_HUGO_SYMBOL'] @@ -1504,13 +1504,17 @@ def getimplications(oncokbdata, implication_type, levels, implications): class GenomicChangeQuery: def __init__(self, chromosome, start, end, ref_allele, var_allele, cancertype, reference_genome=None): + if chromosome is not None: + chromosome = chromosome.strip() + if chromosome.startswith('chr'): + chromosome = chromosome[3:] self.genomicLocation = ','.join([chromosome, start, end, ref_allele, var_allele]) self.tumorType = cancertype if reference_genome is not None: self.referenceGenome = reference_genome.value def __repr__(self): - return " ".join([self.genomicLocation, self.tumorType, self.referenceGenome]) + return " ".join([self.genomicLocation, self.tumorType]) class CNAQuery: diff --git a/README.md b/README.md index beaa2fc..a86e1c8 100644 --- a/README.md +++ b/README.md @@ -67,10 +67,19 @@ OncoKB™ MafAnnotator supports annotating the alteration with HGVSp, HGVSp_Shor The acceptable values are HGVSp_Short, HGVSp, HGVSg and Genomic_Change(case-insensitive). Please see data/example.sh for examples. If you do not specify query type, the MafAnnotator will try to figure out the query type based on the headers. -For HGVSp_Short, the annotator takes alteration from the column HGVSp_Short or Alteration -For HGVSp, the annotator takes alteration from the column HGVSp or Alteration -For HGVSg, the annotator takes alteration from the column HGVSg or Alteration -For Genomic_Change, the annotator takes genomic change from columns Chromosome, Start_Position, End_Position, Reference_Allele, Tumor_Seq_Allele1 and Tumor_Seq_Allele2. +#### For HGVSp_Short +The annotator takes alteration from the column HGVSp_Short or Alteration + +#### For HGVSp +The annotator takes alteration from the column HGVSp or Alteration + +#### For HGVSg +The annotator takes alteration from the column HGVSg or Alteration + +#### For Genomic_Change +The annotator takes genomic change from columns Chromosome, Start_Position, End_Position, Reference_Allele, Tumor_Seq_Allele1(Optional) and Tumor_Seq_Allele2. +Typically Tumor_Seq_Allele1 is the reference allele, Tumor_Seq_Allele2 is the variant allele. This is why Tumor_Seq_Allele1 is optional. +The annotator uses both if the value is different from Reference_Allele. Tumor_Seq_Allele2 has higher priority than Tumor_Seq_Allele1. Annotation with Genomic_Change is relatively slow. We need to annotate the variant first with GenomeNexus(https://www.genomenexus.org/) then get annotation one by one. There is a plan to improve this method. If you are annotating a lot of data, please prioritize using other query type if applicable.