Skip to content

Commit

Permalink
[feat,chore] Add qscore adjustment options, v0.5
Browse files Browse the repository at this point in the history
[feat]

Add options --adjust-qs and --adjust-by to allow users to enable/distable the
quality score readjustments with user specified values.

Add reporting runtime, start and finish time for users

[chore]

Remove deprecated code, update README.md
  • Loading branch information
isinaltinkaya committed Mar 1, 2024
1 parent 9dd8010 commit 1b08819
Show file tree
Hide file tree
Showing 11 changed files with 443 additions and 526 deletions.
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -204,7 +204,7 @@ FLAGS := $(CPPFLAGS) $(CXXFLAGS)


# Versioning
VERSION = v0.4.3
VERSION = v0.5

ifneq ($(wildcard .git),)
VERSION := $(VERSION)-$(shell git describe --always)
Expand Down
200 changes: 99 additions & 101 deletions README.md

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions bcf_utils.cpp
Expand Up @@ -72,6 +72,16 @@ simRecord::simRecord(bcf_hdr_t* in_hdr) {
this->base_qScores[s][i] = -1;
}
}

if (PROGRAM_WILL_ADJUST_QS) {
this->adj_base_qScores = (int**)malloc(this->nSamples * sizeof(double*));
for (int s = 0;s < this->nSamples;++s) {
this->adj_base_qScores[s] = (int*)malloc(this->_nBasesPerSample * sizeof(int));
for (int i = 0;i < this->_nBasesPerSample;++i) {
this->adj_base_qScores[s][i] = -1;
}
}
}
}

if ((0 == args->preCalc) && (1 == args->usePreciseGlError)) {
Expand Down Expand Up @@ -277,6 +287,10 @@ simRecord::~simRecord() {
free(this->base_qScores[s]);
this->base_qScores[s] = NULL;
}
if (NULL != this->adj_base_qScores) {
free(this->adj_base_qScores[s]);
this->adj_base_qScores[s] = NULL;
}
free(this->bases[s]);
this->bases[s] = NULL;
}
Expand All @@ -288,6 +302,10 @@ simRecord::~simRecord() {
free(this->base_qScores);
this->base_qScores = NULL;
}
if (NULL != this->adj_base_qScores) {
free(this->adj_base_qScores);
this->adj_base_qScores = NULL;
}
free(this->bases);
this->bases = NULL;

Expand Down Expand Up @@ -623,11 +641,17 @@ void simRecord::expand_arrays(const int new_size) {
if (NULL != this->base_qScores) {
this->base_qScores[s] = (int*)realloc(this->base_qScores[s], this->_nBasesPerSample * sizeof(int));
}
if (NULL != this->adj_base_qScores) {
this->adj_base_qScores[s] = (int*)realloc(this->adj_base_qScores[s], this->_nBasesPerSample * sizeof(int));
}
this->bases[s] = (int*)realloc(this->bases[s], this->_nBasesPerSample * sizeof(int));
for (int i = old_nBasesPerSample;i < this->_nBasesPerSample;++i) {
if (NULL != this->base_qScores) {
this->base_qScores[s][i] = -1;
}
if (NULL != this->adj_base_qScores) {
this->adj_base_qScores[s][i] = -1;
}
this->bases[s][i] = -1;
}
}
Expand Down
1 change: 1 addition & 0 deletions bcf_utils.h
Expand Up @@ -123,6 +123,7 @@ typedef struct simRecord {
// \def base_qScores[nSamples][nBasesPerSample]
// if (base_qScores==NULL); then use args->qScore instead
int** base_qScores = NULL;
int** adj_base_qScores = NULL;

// \def acgtint_bases[nSamples][nBasesPerSample]
// int representation of base (A=0,C=1,G=2,T=3)
Expand Down
246 changes: 14 additions & 232 deletions gl_methods.cpp
@@ -1,24 +1,6 @@
#include "gl_methods.h"
#include "io.h"

extern glModel1Struct* glModel1;
glModel1Struct* glModel1_init(void) {
glModel1Struct* glModel1 = (glModel1Struct*)malloc(sizeof(glModel1Struct));
ASSERT(NULL != glModel1);

glModel1->errmod = errmod_init(1.0 - args->glModel1_theta);
ASSERT(NULL != glModel1->errmod);

return(glModel1);
}

void glModel1_destroy(glModel1Struct* glModel1) {
errmod_destroy(glModel1->errmod);
free(glModel1);
return;
}


void alleles_calculate_gls_log10_glModel2_fixedQScore(simRecord* sim) {

const double homT = args->preCalc->homT;
Expand Down Expand Up @@ -98,6 +80,9 @@ void alleles_calculate_gls_log10_glModel2_precise0(simRecord* sim) {
int ao;
int qs;

int** qScores = NULL;
qScores = (PROGRAM_WILL_ADJUST_QS_FOR_GL) ? sim->adj_base_qScores : sim->base_qScores;

for (int s = 0;s < sim->nSamples;s++) {
n_sim_reads = sim->fmt_dp_arr[s];

Expand All @@ -109,7 +94,8 @@ void alleles_calculate_gls_log10_glModel2_precise0(simRecord* sim) {

bo = sim->bases[s][read_i];

qs = sim->base_qScores[s][read_i];
qs = qScores[s][read_i];

homT = qScore_to_log10_gl[0][qs];
het = qScore_to_log10_gl[1][qs];
homF = qScore_to_log10_gl[2][qs];
Expand Down Expand Up @@ -253,6 +239,9 @@ void alleles_calculate_gls_log10_glModel1(simRecord* sim) {
int a1, a2, b1, b2;
int g = 0;

int** qScores = NULL;
qScores = (PROGRAM_WILL_ADJUST_QS_FOR_GL) ? sim->adj_base_qScores : sim->base_qScores;

// 5 * 5 = 25 full allele combinations
// for 5 alleles (A,C,G,T,<*>)
float fpls[25];
Expand All @@ -267,12 +256,12 @@ void alleles_calculate_gls_log10_glModel1(simRecord* sim) {
uint16_t ubases[n_sim_reads];

for (int i = 0;i < n_sim_reads;++i) {
qs = sim->base_qScores[s][i];
qs = qScores[s][i];
ubases[i] = qs << 5 | sim->bases[s][i];
}

// 5 alleles (A,C,G,T,<*>)
errmod_cal(glModel1->errmod, n_sim_reads, 5, ubases, fpls);
errmod_cal(args->gl1errmod, n_sim_reads, 5, ubases, fpls);

max = NEG_INF;
g = 0;
Expand Down Expand Up @@ -324,6 +313,8 @@ void alleles_calculate_gls_log10_glModel1_fixedQScore(simRecord* sim) {
// for 5 alleles (A,C,G,T,<*>)
float fpls[25];

int q5 = (PROGRAM_WILL_ADJUST_QS_FOR_GL) ? args->preCalc->adj_q5 : args->preCalc->q5;

for (int s = 0;s < sim->nSamples;s++) {
n_sim_reads = sim->fmt_dp_arr[s];

Expand All @@ -333,11 +324,11 @@ void alleles_calculate_gls_log10_glModel1_fixedQScore(simRecord* sim) {

uint16_t ubases[n_sim_reads];
for (int i = 0;i < n_sim_reads;++i) {
ubases[i] = args->preCalc->q5 | sim->bases[s][i];
ubases[i] = q5 | sim->bases[s][i];
}

// 5 alleles (A,C,G,T,<*>)
errmod_cal(glModel1->errmod, n_sim_reads, 5, ubases, fpls);
errmod_cal(args->gl1errmod, n_sim_reads, 5, ubases, fpls);

max = NEG_INF;
g = 0;
Expand Down Expand Up @@ -374,212 +365,3 @@ void alleles_calculate_gls_log10_glModel1_fixedQScore(simRecord* sim) {
}

}


// --- BELOW : WITH OLD GL_VALS ARRAY --

// glModel2 using non-precise gl error probability using qScore to gl lut
void calculate_gls_log10_glModel2_precise0(const int bo, const int qScore, double* sample_acgt_gls) {

double homT = qScore_to_log10_gl[0][qScore];
double het = qScore_to_log10_gl[1][qScore];
double homF = qScore_to_log10_gl[2][qScore];

#if DEV==1
int n = 0; //DEBUG
#endif

// bcf_alelles2gt(bo, bo) = ((bo * (bo + 1) / (2 + bo));
sample_acgt_gls[(bo * (bo + 1) / 2 + bo)] += homT; // homozygotic hit

#if DEV==1
n++;
#endif


for (int b1 = 0;b1 < 5;++b1) {
if (b1 == bo) {
continue;
} else if (b1 > bo) {
// if b1>bo; then bcf_alleles2gt(b1,bo) == b1 * (b1 + 1) / 2 + bo
sample_acgt_gls[(b1 * (b1 + 1) / 2 + bo)] += het; // heterozygotic hit

#if DEV==1
n++;
#endif

for (int b2 = b1;b2 < 5;++b2) {
// always: b2>=b1
if (b2 != bo) {
// if b2>b1; then bcf_alleles2gt(b1,b2) == b2 * (b2 + 1) / 2 + b1
sample_acgt_gls[(b2 * (b2 + 1) / 2 + b1)] += homF; // non hit

#if DEV==1
n++;
#endif

}
}

} else if (b1 < bo) {
// if b1<bo; then bcf_alleles2gt(b1,bo) == bo * (bo + 1) / 2 + b1
sample_acgt_gls[(bo * (bo + 1) / 2 + b1)] += het; // heterozygotic hit

#if DEV==1
n++;
#endif

for (int b2 = b1;b2 < 5;++b2) {
// always: b2>=b1
if (b2 != bo) {
// if b2>b1; then bcf_alleles2gt(b1,b2) == b2 * (b2 + 1) / 2 + b1
sample_acgt_gls[(b2 * (b2 + 1) / 2 + b1)] += homF; // non hit

#if DEV==1
n++;
#endif

}
}
}
}

#if DEV==1
ASSERT(n == 15);
#endif

}


// glModel2 using precise gl error probability from const double e
void calculate_gls_log10_glModel2_precise1(const int bo, const double e, double* sample_acgt_gls) {

double homT = 0.0;
double het = 0.0;
double homF = 0.0;

if (0.0 == e) {
homT = 0.0;
het = -0.30103;
homF = NEG_INF;
} else {
homT = log10(1.0 - e);
het = log10((1.0 - e) / 2.0 + e / 6.0);
homF = log10(e / 3.0);
}

#if DEV==1
int n = 0; //DEBUG
#endif


// bcf_alelles2gt(bo, bo) = ((bo * (bo + 1) / (2 + bo));
sample_acgt_gls[(bo * (bo + 1) / 2 + bo)] += homT; // homozygotic hit

#if DEV==1
n++;
#endif


for (int b1 = 0;b1 < 5;++b1) {
if (b1 == bo) {
continue;
} else if (b1 > bo) {
// if b1>bo; then bcf_alleles2gt(b1,bo) == b1 * (b1 + 1) / 2 + bo
sample_acgt_gls[(b1 * (b1 + 1) / 2 + bo)] += het; // heterozygotic hit

#if DEV==1
n++;
#endif


for (int b2 = b1;b2 < 5;++b2) {
// always: b2>=b1
if (b2 != bo) {
// if b2>b1; then bcf_alleles2gt(b1,b2) == b2 * (b2 + 1) / 2 + b1
sample_acgt_gls[(b2 * (b2 + 1) / 2 + b1)] += homF; // non hit

#if DEV==1
n++;
#endif

}
}

} else if (b1 < bo) {
// if b1<bo; then bcf_alleles2gt(b1,bo) == bo * (bo + 1) / 2 + b1
sample_acgt_gls[(bo * (bo + 1) / 2 + b1)] += het; // heterozygotic hit

#if DEV==1
n++;
#endif


for (int b2 = b1;b2 < 5;++b2) {
// always: b2>=b1
if (b2 != bo) {
// if b2>b1; then bcf_alleles2gt(b1,b2) == b2 * (b2 + 1) / 2 + b1
sample_acgt_gls[(b2 * (b2 + 1) / 2 + b1)] += homF; // non hit

#if DEV==1
n++;
#endif

}
}
}
}

#if DEV==1
ASSERT(n == 15);
#endif

}

// glModel1 with fixed qScore
void calculate_gls_log10_glModel1(int* bases, int n_bases, float* gls, int* qScores, glModel1Struct* glModel1, simRecord* sim) {

uint16_t ubases[n_bases];

// 5 * 5 = 25 full allele combinations
// for 5 alleles (A,C,G,T,<*>)
float fpls[25];

int q;

for (int i = 0;i < n_bases;++i) {
q = qScores[i];
ubases[i] = q << 5 | bases[i];
}

// 5 alleles (A,C,G,T,<*>)
errmod_cal(glModel1->errmod, n_bases, 5, ubases, fpls);

float max = NEG_INF;
int a1, a2;
int b1, b2;
int g = 0;

for (a2 = 0; a2 < sim->nAlleles; ++a2) {
b2 = sim->alleles2acgt[a2];
for (a1 = 0; a1 <= a2; ++a1) {
b1 = sim->alleles2acgt[a1];

gls[g] = ((-1.0 * fpls[(b1 * 5 + b2)]) / 10.0);

if (gls[g] > max) {
max = gls[g];
}

++g;
}
}

DEVASSERT(g == sim->nGenotypes);

for (int i = 0;i < g;++i) {
gls[i] -= max;
}

}

9 changes: 0 additions & 9 deletions gl_methods.h
Expand Up @@ -3,15 +3,6 @@

#include "bcf_utils.h"

typedef struct glModel1Struct glModel1Struct;

struct glModel1Struct {
errmod_t* errmod;
};
extern glModel1Struct* glModel1;
glModel1Struct* glModel1_init(void);
void glModel1_destroy(glModel1Struct* glModel1);

void alleles_calculate_gls_log10_glModel2_fixedQScore(simRecord* sim);
void alleles_calculate_gls_log10_glModel2_precise0(simRecord* sim);
void alleles_calculate_gls_log10_glModel2_precise1(simRecord* sim);
Expand Down

0 comments on commit 1b08819

Please sign in to comment.