Skip to content

Commit

Permalink
Use interval start as position for Ensembl VEP ANN attributes.
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Oct 26, 2020
1 parent e63137b commit 2d9f1c8
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
Expand Up @@ -83,6 +83,18 @@ private[adam] object TranscriptEffectConverter extends Serializable with Logging
s.split("&").map(MESSAGES.get(_)).toList.flatten
}

/**
* Ensembl VEP incorrectly supplies an interval instead of a position
* for some attributes; in these cases use the interval start as the position.
*
* @param s position or interval
* @return position or interval start
*/
private def positionOrIntervalStart(s: String): Int = {
val tokens = s.split("-")
Integer.parseInt(tokens(0))
}

/**
* Split a single or fractional value into optional numerator and denominator values.
*
Expand All @@ -96,8 +108,8 @@ private[adam] object TranscriptEffectConverter extends Serializable with Logging
val tokens = s.split("/")
tokens.length match {
case 0 => (None, None)
case 1 => (Some(Integer.parseInt(tokens(0))), None)
case _ => (Some(Integer.parseInt(tokens(0))), Some(Integer.parseInt(tokens(1))))
case 1 => (Some(positionOrIntervalStart(tokens(0))), None)
case _ => (Some(positionOrIntervalStart(tokens(0))), Some(Integer.parseInt(tokens(1))))
}
}

Expand Down
Expand Up @@ -35,6 +35,7 @@ class TranscriptEffectConverterSuite extends ADAMFunSuite {
final val INVALID_NUMBER = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4|1/42|not a number|"
final val INVALID_FRACTION = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|not a number/2|c.-485C>T|||4|1/42|453|"
final val VALID = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4|1/42|453|"
final val VEP_POSITION = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4-5/420|1/42|453|"
final val DIFFERENT_ALT = "A|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4|1/42|453|"

var variant: Variant = null
Expand Down Expand Up @@ -170,6 +171,16 @@ class TranscriptEffectConverterSuite extends ADAMFunSuite {
})
}

test("parse VCF ANN attribute with Ensembl VEP position attribute") {
val ann = TranscriptEffectConverter.parseAnn(Seq(VEP_POSITION), ValidationStringency.STRICT)
assert(ann.length == 1)

val te = ann.head
assert(te.getAlternateAllele == "T")
assert(te.getCdsPosition == 4)
assert(te.getCdsLength == 420)
}

test("convert to transcript effect from null VCF ANN attribute in variant context") {
when(variantContext.getAttributeAsList("ANN")).thenReturn(null)

Expand Down

0 comments on commit 2d9f1c8

Please sign in to comment.