Skip to content

Commit

Permalink
[feat,perf] Add support for non-binary REF/ALT and multiallelic input
Browse files Browse the repository at this point in the history
Add support for multiallelic input sites

Add --source option with possible values: "binary" (for binary ALT and REF in
input VCF) and "acgt" (default; for regular ALT and REF in input VCF).
Set the --source acgt as default.

Add fetchGl.c; small program to fetch gl values corresponding to specific
genotypes. Useful for testing. Usage: `misc/fetchGl -i infile.bcf -gt AT`

Add gtDiscordance doGQ 8 to get GQ values via second smallest PL value
assuming smallest PL has been rescaled to 0

[perf]

Performance improvement via reducing overhead, changed from calling poisson
sampler at persample persite to persite for all samples

Minor safety improvements via static arrays, macros for using
lookup tables safely

[chore]

Update check arg macro for inclusive and exclusive start end values
  • Loading branch information
isinaltinkaya committed Jan 29, 2024
1 parent 8f175d5 commit 8306de4
Show file tree
Hide file tree
Showing 26 changed files with 2,025 additions and 1,573 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -18,3 +18,4 @@ vgcore*
COMMIT_CHANGES

misc/gtDiscordance
misc/fetchGl
172 changes: 2 additions & 170 deletions bcf_utils.cpp
Expand Up @@ -499,175 +499,6 @@ void simRecord::add_tags() {
}


// reset the reused arrays by setting elements to initial values
// these were allocated during simRecord construction, and reused in
// simulating each record
void simRecord::reset_rec_objects() {


// ACGT: A, C, G, T, <*> (non-ref)
// assumption: MAX_NALLELES == 5
this->acgt2alleles[0] = -1;
this->acgt2alleles[1] = -1;
this->acgt2alleles[2] = -1;
this->acgt2alleles[3] = -1;
this->acgt2alleles[4] = -1;

this->alleles2acgt[0] = -1;
this->alleles2acgt[1] = -1;
this->alleles2acgt[2] = -1;
this->alleles2acgt[3] = -1;
this->alleles2acgt[4] = -1;

this->nAlleles = 0;
this->nAllelesObserved = 0;

//TODO check why removing this has no effect
free(this->gt_arr);
this->gt_arr = NULL;

// -> reset INFO_ACGT arrays (size=4)
// unroll
this->acgt_info_ad_arr[0] = 0;
this->acgt_info_ad_arr[1] = 0;
this->acgt_info_ad_arr[2] = 0;
this->acgt_info_ad_arr[3] = 0;

this->acgt_sum_taildist[0] = 0.0;
this->acgt_sum_taildist[1] = 0.0;
this->acgt_sum_taildist[2] = 0.0;
this->acgt_sum_taildist[3] = 0.0;

this->acgt_sum_taildist_sq[0] = 0.0;
this->acgt_sum_taildist_sq[1] = 0.0;
this->acgt_sum_taildist_sq[2] = 0.0;
this->acgt_sum_taildist_sq[3] = 0.0;

// -> reset FMT_ACGT arrays (size=4*nSamples)
for (int i = 0; i < nSamples * 4; ++i) {
this->acgt_fmt_ad_arr[i] = 0;
this->acgt_fmt_adf_arr[i] = 0;
this->acgt_fmt_adr_arr[i] = 0;
this->acgt_fmt_qsum_arr[i] = 0;
this->acgt_fmt_qsum_sq_arr[i] = 0;

}

// 2 * base + which_strand
this->acgt_n_bases_forI16[0] = 0;
this->acgt_n_bases_forI16[1] = 0;
this->acgt_n_bases_forI16[2] = 0;
this->acgt_n_bases_forI16[3] = 0;
this->acgt_n_bases_forI16[4] = 0;
this->acgt_n_bases_forI16[5] = 0;
this->acgt_n_bases_forI16[6] = 0;
this->acgt_n_bases_forI16[7] = 0;

int size = -1;


if (this->fmt_dp_arr != NULL) {
size = this->max_size_bcf_tag_number[bcf_tags[FMT_DP].n];
for (int i = 0;i < size;++i) {
this->fmt_dp_arr[i] = 0;
}
}

if (this->info_dp_arr != NULL) {
size = this->max_size_bcf_tag_number[bcf_tags[INFO_DP].n];
for (int i = 0;i < size;++i) {
this->info_dp_arr[i] = 0;
}
}

// GL PL GP share the same size
size = this->max_size_bcf_tag_number[bcf_tags[GL].n];
for (int i = 0;i < size;++i) {
this->gl_arr[i] = -0.0;

if (NULL != this->gp_arr) {
this->gp_arr[i] = MINGP;
}

if (NULL != this->pl_arr) {
this->pl_arr[i] = MAXPL;
}
}


if (this->qs_arr != NULL) {
size = this->max_size_bcf_tag_number[bcf_tags[QS].n];
for (int i = 0;i < size;++i) {
this->qs_arr[i] = 0.0;
}
}


if (this->info_ad_arr != NULL) {
size = this->max_size_bcf_tag_number[bcf_tags[INFO_AD].n];
for (int i = 0;i < size;++i) {
this->info_ad_arr[i] = 0;
}
}

if (this->info_adf_arr != NULL) {
size = this->max_size_bcf_tag_number[bcf_tags[INFO_ADF].n];
for (int i = 0;i < size;++i) {
this->info_adf_arr[i] = 0;
}
}

if (this->info_adr_arr != NULL) {
size = this->max_size_bcf_tag_number[bcf_tags[INFO_ADR].n];
for (int i = 0;i < size;++i) {
this->info_adr_arr[i] = 0;
}
}

if (this->fmt_ad_arr != NULL) {
size = this->max_size_bcf_tag_number[bcf_tags[FMT_AD].n];
for (int i = 0;i < size;++i) {
this->fmt_ad_arr[i] = 0;
}
}

if (this->fmt_adf_arr != NULL) {
size = this->max_size_bcf_tag_number[bcf_tags[FMT_ADF].n];
for (int i = 0;i < size;++i) {
this->fmt_adf_arr[i] = 0;
}
}

if (this->fmt_adr_arr != NULL) {
size = this->max_size_bcf_tag_number[bcf_tags[FMT_ADR].n];
for (int i = 0;i < size;++i) {
this->fmt_adr_arr[i] = 0;
}
}

if (this->i16_arr != NULL) {
// unroll
this->i16_arr[0] = 0.0;
this->i16_arr[1] = 0.0;
this->i16_arr[2] = 0.0;
this->i16_arr[3] = 0.0;
this->i16_arr[4] = 0.0;
this->i16_arr[5] = 0.0;
this->i16_arr[6] = 0.0;
this->i16_arr[7] = 0.0;
this->i16_arr[8] = 0.0;
this->i16_arr[9] = 0.0;
this->i16_arr[10] = 0.0;
this->i16_arr[11] = 0.0;
this->i16_arr[12] = 0.0;
this->i16_arr[13] = 0.0;
this->i16_arr[14] = 0.0;
this->i16_arr[15] = 0.0;
}


this->alleles.l = 0;
}

void simRecord::set_hdr(bcf_hdr_t* in_hdr) {

Expand Down Expand Up @@ -1159,6 +990,7 @@ void gvcfData_destroy(gvcfData* gvcfd) {
}



bcf_tag_t bcf_tags[] = {
/* *********************************************************************** *
* INFO Describe the overall variation at site, one value array
Expand Down Expand Up @@ -1405,4 +1237,4 @@ bcf_tag_t bcf_tags[] = {
"alleles\">",
},

};
};

0 comments on commit 8306de4

Please sign in to comment.