Skip to content

Commit

Permalink
Merge pull request #545 from ekg/skip-coverage
Browse files Browse the repository at this point in the history
Skip coverage fixes
  • Loading branch information
ekg committed Jun 3, 2019
2 parents 7e863ee + ffcee0d commit 4cde5d3
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 16 deletions.
17 changes: 7 additions & 10 deletions src/AlleleParser.cpp
Expand Up @@ -1266,19 +1266,16 @@ Allele AlleleParser::makeAllele(RegisteredAlignment& ra,
// a dangerous game
int start = pos - currentSequenceStart;
double minEntropy = parameters.minRepeatEntropy;
// check first that' wer'e actually ina repeat... TODO
//cerr << "entropy of " << entropy(currentSequence.substr(start, repeatRightBoundary - pos)) << " is too low, " << endl;
while (minEntropy > 0 && // ignore if turned off
repeatRightBoundary - currentSequenceStart < currentSequence.size() && //guard
// don't run off the end of the current sequence
repeatRightBoundary - currentSequenceStart < currentSequence.size() &&
// there is no point in going past the alignment end
// because we won't make a haplotype call unless we have a covering observation from a read
repeatRightBoundary < alignment.ENDPOSITION &&
entropy(currentSequence.substr(start, repeatRightBoundary - pos)) < minEntropy) {
//cerr << "entropy of " << entropy(currentSequence.substr(start, repeatRightBoundary - pos)) << " is too low, ";
//cerr << "increasing rought boundary to ";
++repeatRightBoundary;
//cerr << repeatRightBoundary << endl;
}

// now we
//cachedRepeatCounts[pos] = repeatCounts(pos - currentSequenceStart, currentSequence, 12);
// edge case, the indel is an insertion and matches the reference to the right
// this means there is a repeat structure in the read, but not the ref
if (currentSequence.substr(pos - currentSequenceStart, length) == readSequence) {
Expand Down Expand Up @@ -2050,7 +2047,7 @@ void AlleleParser::updateAlignmentQueue(long int position,
//cerr << "parameters capcoverage " << parameters.capCoverage << " " << rq.size() << endl;
if (considerAlignment) {
// and insert the registered alignment into that deque
rq.push_front(RegisteredAlignment(currentAlignment, parameters));
rq.push_front(RegisteredAlignment(currentAlignment));
RegisteredAlignment& ra = rq.front();
registerAlignment(currentAlignment, ra, sampleName, sequencingTech);
// backtracking if we have too many mismatches
Expand Down Expand Up @@ -2084,7 +2081,7 @@ void AlleleParser::removeRegisteredAlignmentsOverlappingPosition(long unsigned i
set<Allele*> allelesToErase;
while (f != registeredAlignments.end()) {
for (deque<RegisteredAlignment>::iterator d = f->second.begin(); d != f->second.end(); ++d) {
if (d->start >= pos && d->end > pos) {
if (d->start <= pos && d->end > pos) {
alignmentsToErase[f->first].insert(d);
for (vector<Allele>::iterator a = d->alleles.begin(); a != d->alleles.end(); ++a) {
allelesToErase.insert(&*a);
Expand Down
5 changes: 1 addition & 4 deletions src/AlleleParser.h
Expand Up @@ -57,10 +57,8 @@ class RegisteredAlignment {
int snpCount;
int indelCount;
int alleleTypes;
Parameters parameters;

RegisteredAlignment(BAMALIGN& alignment, Parameters parameters)
//: alignment(alignment)
RegisteredAlignment(BAMALIGN& alignment)
: start(alignment.POSITION)
, end(alignment.ENDPOSITION)
, refid(alignment.REFID)
Expand All @@ -69,7 +67,6 @@ class RegisteredAlignment {
, snpCount(0)
, indelCount(0)
, alleleTypes(0)
, parameters(parameters)
{
FILLREADGROUP(readgroup, alignment);
}
Expand Down
4 changes: 2 additions & 2 deletions test/t/01_call_variants.t
Expand Up @@ -119,5 +119,5 @@ samtools view -h tiny/NA12878.chr22.tiny.bam | sed s/NA12878D_HiSeqX_R1.fastq.gz
is $(freebayes -f tiny/q.fa -F 0.2 tiny/NA12878.chr22.tiny.bam x.sam -A <(echo 1 8; echo 2 13) | grep 'AN=21' | wc -l) 19 "the CNV map may be used to specify per-sample copy numbers"
rm -f x.sam
is $(freebayes -f tiny/q.fa --skip-coverage 30 tiny/NA12878.chr22.tiny.bam | grep -v '^#' | wc -l) 27 "freebayes makes the expected number of calls when capping coverage"
is $(freebayes -f tiny/q.fa -g 30 tiny/NA12878.chr22.tiny.bam | vcfkeepinfo - DP | vcf2tsv | cut -f 8 | tail -n+2 | awk '$1 <= 30 { print }' | wc -l) 27 "all coverage capped calls are below the coverage threshold"
is $(freebayes -f tiny/q.fa --skip-coverage 30 tiny/NA12878.chr22.tiny.bam | grep -v '^#' | wc -l) 22 "freebayes makes the expected number of calls when capping coverage"
is $(freebayes -f tiny/q.fa -g 30 tiny/NA12878.chr22.tiny.bam | vcfkeepinfo - DP | vcf2tsv | cut -f 8 | tail -n+2 | awk '$1 <= 30 { print }' | wc -l) 22 "all coverage capped calls are below the coverage threshold"

0 comments on commit 4cde5d3

Please sign in to comment.