Skip to content

Commit

Permalink
Handle another type of shitty input MAF
Browse files Browse the repository at this point in the history
  • Loading branch information
ckandoth committed Sep 19, 2015
1 parent 3903971 commit 7dd5d96
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
8 changes: 6 additions & 2 deletions maf2vcf.pl
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@
$al1 = $ref unless( defined $al1 );
$al2 = $ref unless( defined $al2 );

# Handle a case when $al1 is a SNP we want to annotate, but $al2 is incorrectly "-"
( $al1, $al2 ) = ( $al2, $al1 ) if( $al2 eq "-" );

# To represent indels in VCF format, we need to fetch the preceding bp from a reference FASTA
my ( $ref_len, $al1_len, $al2_len ) = map{( $_=~m/^(\?|-|0)+$/ ? 0 : length( $_ )) } ( $ref, $al1, $al2 );
if( $ref_len == 0 or $al1_len == 0 or $al2_len == 0 ) {
Expand Down Expand Up @@ -163,8 +166,9 @@
}

# Set tumor and normal genotypes (FORMAT tag GT in VCF)
my $t_gt = join( "/", $al_idx{$al1}, $al_idx{$al2} );
my $n_gt = join( "/", $al_idx{$n_al1}, $al_idx{$n_al2} );
my ( $t_gt, $n_gt ) = ( "0/1", "0/0" ); # Set defaults
$t_gt = join( "/", $al_idx{$al2}, $al_idx{$al1} ) if( $al_idx{$al1} ne "0" );
$n_gt = join( "/", $al_idx{$n_al2}, $al_idx{$n_al1} ) if( $al_idx{$n_al1} ne "0" or $al_idx{$n_al2} ne "0" );

# Create the VCF's comma-delimited ALT field that must list all non-REF (variant) alleles
my $alt = join( ",", @alleles[1..$#alleles] );
Expand Down
10 changes: 5 additions & 5 deletions vcf2maf.pl
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,11 @@ sub GetBiotypePriority {
# Figure out the appropriate start/stop loci and variant type/allele to report in the MAF
my $start = my $stop = my $var_type = "";
my ( $ref_length, $var_length ) = ( length( $ref ), length( $var ));
# Remove any prefixed reference bps from all alleles, using "-" for simple indels
while( substr( $ref, 0, 1 ) eq substr( $var, 0, 1 )) {
( $ref, $var, @alleles ) = map{$_ = substr( $_, 1 ); ( $_ ? $_ : "-" )} ( $ref, $var, @alleles );
--$ref_length; --$var_length; ++$pos;
}
# Handle SNPs, DNPs, TNPs, or anything larger (ONP)
if( $ref_length == $var_length ) {
( $start, $stop ) = ( $pos, $pos + $var_length - 1 );
Expand All @@ -504,11 +509,6 @@ sub GetBiotypePriority {
}
# Handle all indels, including those complex ones which contain substitutions
elsif( $ref_length != $var_length ) {
# Remove any prefixed reference bps from all alleles, using "-" for simple indels
while( substr( $ref, 0, 1 ) eq substr( $var, 0, 1 )) {
( $ref, $var, @alleles ) = map{$_ = substr( $_, 1 ); ( $_ ? $_ : "-" )} ( $ref, $var, @alleles );
--$ref_length; --$var_length; ++$pos;
}
if( $ref_length < $var_length ) { # Handle insertions, and the special case for complex ones
( $start, $stop ) = ( $pos - 1, ( $ref eq "-" ? $pos : $pos + $ref_length - 1 ));
$var_type = "INS";
Expand Down

0 comments on commit 7dd5d96

Please sign in to comment.