Skip to content

Commit

Permalink
Fix: VCFs need semicolon delim FILTER data, and header lines
Browse files Browse the repository at this point in the history
  • Loading branch information
ckandoth committed Aug 8, 2017
1 parent a507111 commit 2f82fa4
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 6 deletions.
8 changes: 6 additions & 2 deletions maf2vcf.pl
Expand Up @@ -53,7 +53,7 @@

# Before anything, let's parse the headers of this supposed "MAF-like" file and do some checks
my $maf_fh = IO::File->new( $input_maf ) or die "ERROR: Couldn't open input MAF: $input_maf!\n";
my ( %uniq_regions, %flanking_bps, @tn_pair, %col_idx, $header_line );
my ( %uniq_regions, %filter_tags, %flanking_bps, @tn_pair, %col_idx, $header_line );
while( my $line = $maf_fh->getline ) {

# If the file uses Mac OS 9 newlines, quit with an error
Expand Down Expand Up @@ -89,10 +89,12 @@
( %col_idx ) or die "ERROR: Couldn't find a header line (must start with Hugo_Symbol, Chromosome, or Tumor_Sample_Barcode): $input_maf\n";

# For each variant in the MAF, parse out the locus for running samtools faidx later
my ( $chr, $pos, $ref ) = map{ my $c = lc; ( defined $col_idx{$c} ? $cols[$col_idx{$c}] : "" )} qw( Chromosome Start_Position Reference_Allele );
my ( $chr, $pos, $ref, $filter ) = map{ my $c = lc; ( defined $col_idx{$c} ? $cols[$col_idx{$c}] : "" )} qw( Chromosome Start_Position Reference_Allele FILTER );
$ref =~ s/^(\?|-|0)+$//; # Blank out the dashes (or other weird chars) used with indels
my $region = "$chr:" . ( $pos - 1 ) . "-" . ( $pos + length( $ref ));
$uniq_regions{$region} = 1;
# Also track the unique FILTER tags seen, so we can construct VCF header lines for each
map{ $filter_tags{$_} = 1 unless( $_ eq "PASS" or $_ eq "." )} split( /,|;/, $filter );
}
$maf_fh->close;

Expand Down Expand Up @@ -152,6 +154,7 @@
$tn_vcf{$vcf_file} .= "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n";
$tn_vcf{$vcf_file} .= "##FORMAT=<ID=AD,Number=G,Type=Integer,Description=\"Allelic depths of REF and ALT(s) in the order listed\">\n";
$tn_vcf{$vcf_file} .= "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Total read depth across this site\">\n";
$tn_vcf{$vcf_file} .= "##FILTER=<ID=$_,Description=\"\">\n" foreach ( sort keys %filter_tags );
$tn_vcf{$vcf_file} .= "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t$t_id\t$n_id\n";
}

Expand Down Expand Up @@ -304,6 +307,7 @@
$vcf_fh->print( "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n" );
$vcf_fh->print( "##FORMAT=<ID=AD,Number=G,Type=Integer,Description=\"Allelic Depths of REF and ALT(s) in the order listed\">\n" );
$vcf_fh->print( "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">\n" );
$vcf_fh->print( "##FILTER=<ID=$_,Description=\"\">\n" ) foreach ( sort keys %filter_tags );
$vcf_fh->print( "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" . join("\t", @vcf_cols) . "\n" );

# Write each variant into the multi-sample VCF
Expand Down
4 changes: 2 additions & 2 deletions vcf2maf.pl
Expand Up @@ -769,15 +769,15 @@ sub GetBiotypePriority {
# Copy FILTER from input VCF, and tag calls with high allele counts in any ExAC subpopulation
my $subpop_count = 0;
# Remove existing common_variant tags from input, so it's redefined by our criteria here
$filter = join( ",", grep{ $_ ne "common_variant" } split( ",", $filter ));
$filter = join( ";", grep{ $_ ne "common_variant" } split( /,|;/, $filter ));
foreach my $subpop ( qw( AFR AMR EAS FIN NFE OTH SAS )) {
if( $maf_line{"ExAC_AC_AN_$subpop"} ) {
my ( $subpop_ac ) = split( "/", $maf_line{"ExAC_AC_AN_$subpop"} );
$subpop_count++ if( $subpop_ac > $max_filter_ac );
}
}
if( $subpop_count > 0 ) {
$filter = (( $filter eq "PASS" or $filter eq "." or !$filter ) ? "common_variant" : "$filter,common_variant" );
$filter = (( $filter eq "PASS" or $filter eq "." or !$filter ) ? "common_variant" : "$filter;common_variant" );
}
$maf_line{FILTER} = $filter;

Expand Down
4 changes: 2 additions & 2 deletions vcf2vcf.pl
Expand Up @@ -300,15 +300,15 @@
# Add more filter tags to the FILTER field, if --add-filters was specified
if( $add_filters ) {
my %tags = ();
map{ $tags{$_} = 1 unless( $_ eq "PASS" or $_ eq "." )} split( ",", $filter );
map{ $tags{$_} = 1 unless( $_ eq "PASS" or $_ eq "." )} split( /,|;/, $filter );
$tags{LowTotalDepth} = 1 if(( $tum_info{DP} ne "." and $tum_info{DP} < $min_tum_depth ) or ( $nrm_info{DP} ne "." and $nrm_info{DP} < $min_nrm_depth ));
my @tum_depths = split( /,/, $tum_info{AD} );
my @nrm_depths = split( /,/, $nrm_info{AD} );
my $tum_alt_depth = $tum_depths[$var_allele_idx];
my $nrm_alt_depth = $nrm_depths[$var_allele_idx];
$tags{LowTumorSupport} = 1 if( $tum_alt_depth ne "." and $tum_alt_depth < $min_tum_support );
$tags{HighNormalSupport} = 1 if( $nrm_alt_depth ne "." and $nrm_alt_depth > $max_nrm_support );
my $tags_to_add = join( ",", sort keys %tags );
my $tags_to_add = join( ";", sort keys %tags );
$filter = ( $tags_to_add ? $tags_to_add : $filter );
}

Expand Down

0 comments on commit 2f82fa4

Please sign in to comment.