diff --git a/src/AlleleParser.cpp b/src/AlleleParser.cpp index ca5d67e4..8e47a6aa 100644 --- a/src/AlleleParser.cpp +++ b/src/AlleleParser.cpp @@ -1266,19 +1266,16 @@ Allele AlleleParser::makeAllele(RegisteredAlignment& ra, // a dangerous game int start = pos - currentSequenceStart; double minEntropy = parameters.minRepeatEntropy; - // check first that' wer'e actually ina repeat... TODO - //cerr << "entropy of " << entropy(currentSequence.substr(start, repeatRightBoundary - pos)) << " is too low, " << endl; while (minEntropy > 0 && // ignore if turned off - repeatRightBoundary - currentSequenceStart < currentSequence.size() && //guard + // don't run off the end of the current sequence + repeatRightBoundary - currentSequenceStart < currentSequence.size() && + // there is no point in going past the alignment end + // because we won't make a haplotype call unless we have a covering observation from a read + repeatRightBoundary < alignment.ENDPOSITION && entropy(currentSequence.substr(start, repeatRightBoundary - pos)) < minEntropy) { - //cerr << "entropy of " << entropy(currentSequence.substr(start, repeatRightBoundary - pos)) << " is too low, "; - //cerr << "increasing rought boundary to "; ++repeatRightBoundary; - //cerr << repeatRightBoundary << endl; } - // now we - //cachedRepeatCounts[pos] = repeatCounts(pos - currentSequenceStart, currentSequence, 12); // edge case, the indel is an insertion and matches the reference to the right // this means there is a repeat structure in the read, but not the ref if (currentSequence.substr(pos - currentSequenceStart, length) == readSequence) { @@ -2050,7 +2047,7 @@ void AlleleParser::updateAlignmentQueue(long int position, //cerr << "parameters capcoverage " << parameters.capCoverage << " " << rq.size() << endl; if (considerAlignment) { // and insert the registered alignment into that deque - rq.push_front(RegisteredAlignment(currentAlignment, parameters)); + rq.push_front(RegisteredAlignment(currentAlignment)); RegisteredAlignment& ra = rq.front(); registerAlignment(currentAlignment, ra, sampleName, sequencingTech); // backtracking if we have too many mismatches @@ -2084,7 +2081,7 @@ void AlleleParser::removeRegisteredAlignmentsOverlappingPosition(long unsigned i set allelesToErase; while (f != registeredAlignments.end()) { for (deque::iterator d = f->second.begin(); d != f->second.end(); ++d) { - if (d->start >= pos && d->end > pos) { + if (d->start <= pos && d->end > pos) { alignmentsToErase[f->first].insert(d); for (vector::iterator a = d->alleles.begin(); a != d->alleles.end(); ++a) { allelesToErase.insert(&*a); diff --git a/src/AlleleParser.h b/src/AlleleParser.h index 9e700710..89663aff 100644 --- a/src/AlleleParser.h +++ b/src/AlleleParser.h @@ -57,10 +57,8 @@ class RegisteredAlignment { int snpCount; int indelCount; int alleleTypes; - Parameters parameters; - RegisteredAlignment(BAMALIGN& alignment, Parameters parameters) - //: alignment(alignment) + RegisteredAlignment(BAMALIGN& alignment) : start(alignment.POSITION) , end(alignment.ENDPOSITION) , refid(alignment.REFID) @@ -69,7 +67,6 @@ class RegisteredAlignment { , snpCount(0) , indelCount(0) , alleleTypes(0) - , parameters(parameters) { FILLREADGROUP(readgroup, alignment); } diff --git a/test/t/01_call_variants.t b/test/t/01_call_variants.t index 40cb02cb..0756b51a 100755 --- a/test/t/01_call_variants.t +++ b/test/t/01_call_variants.t @@ -119,5 +119,5 @@ samtools view -h tiny/NA12878.chr22.tiny.bam | sed s/NA12878D_HiSeqX_R1.fastq.gz is $(freebayes -f tiny/q.fa -F 0.2 tiny/NA12878.chr22.tiny.bam x.sam -A <(echo 1 8; echo 2 13) | grep 'AN=21' | wc -l) 19 "the CNV map may be used to specify per-sample copy numbers" rm -f x.sam -is $(freebayes -f tiny/q.fa --skip-coverage 30 tiny/NA12878.chr22.tiny.bam | grep -v '^#' | wc -l) 27 "freebayes makes the expected number of calls when capping coverage" -is $(freebayes -f tiny/q.fa -g 30 tiny/NA12878.chr22.tiny.bam | vcfkeepinfo - DP | vcf2tsv | cut -f 8 | tail -n+2 | awk '$1 <= 30 { print }' | wc -l) 27 "all coverage capped calls are below the coverage threshold" +is $(freebayes -f tiny/q.fa --skip-coverage 30 tiny/NA12878.chr22.tiny.bam | grep -v '^#' | wc -l) 22 "freebayes makes the expected number of calls when capping coverage" +is $(freebayes -f tiny/q.fa -g 30 tiny/NA12878.chr22.tiny.bam | vcfkeepinfo - DP | vcf2tsv | cut -f 8 | tail -n+2 | awk '$1 <= 30 { print }' | wc -l) 22 "all coverage capped calls are below the coverage threshold"