/
getGeneExpression.cpp
120 lines (113 loc) · 4.49 KB
/
getGeneExpression.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*
*
* Produce overall gene expression
*
*/
#include<cmath>
using namespace std;
#include "ArgumentParser.h"
#include "misc.h"
#include "PosteriorSamples.h"
#include "TranscriptInfo.h"
#include "common.h"
extern "C" int getGeneExpression(int *argc,char* argv[]){
string programDescription=
"Computes expression of whole genes.\n\
[samplesFile] should contain transposed MCMC samples which will be transformed into gene expression samples.";
// Set options {{{
ArgumentParser args(programDescription,"[samplesFile]",1);
args.addOptionS("t","trInfoFile","trInfoFileName",1,"Name of the transcript file.");
args.addOptionB("a","adjustByLength","adjust",0,"Adjust expression by transcripts length.");
args.addOptionB("","theta2rpkm","rpkm",0,"Transform transcript expression in theta to gene expression in RPKM.");
args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
args.addOptionB("l","log","log",0,"Output logged values.");
args.addOptionS("T","trMap","trMapFile",0,"Name of the file containing transcript to gene mapping.");
args.addOptionS("G","geneList","geneListFile",0,"Name of the file containing list of gene names (one for each transcript).");
args.addOptionB("","updateTrFile","updateTrFile",0,"Update trInfoFile if new gene names were provided (with trMapFile or geneListFile).");
args.addOptionS("g","geneInfoFile","geneInfoFile",0,"Name of while to which gene information will be saved.");
if(!args.parse(*argc,argv))return 0;
if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
// }}}
bool doLog,doAdjust=args.flag("adjust")||args.flag("rpkm"),doRPKM=args.flag("rpkm");
doLog = ns_genes::getLog(args);
long N=0,M=0,G;
TranscriptInfo trInfo;
PosteriorSamples samples;
if(!ns_genes::prepareInput(args, &trInfo, &samples, &M, &N, &G))return 1;
if(!ns_genes::updateGenes(args, &trInfo, &G))return 1;
if(args.verb())messageF("Genes: %ld\n",G);
if(!ns_genes::checkGeneCount(G,M))return 1;
if(args.flag("updateTrFile") && (args.isSet("trMapFile") || args.isSet("geneListFile"))){
if(args.verb())message("Updating transcript info file with new gene names.\n");
if(!trInfo.writeInfo(args.getS("trInfoFileName"), true)){
if(args.verb())warning("Main: Updating trInfoFile failed.\n");
}
}
if(args.isSet("geneInfoFile")){
if(args.verb())message("Saving gene information into: %s.\n",args.getS("geneInfoFile").c_str());
if(!trInfo.writeGeneInfo(args.getS("geneInfoFile"))){
warning("Main: Writing gene information failed.\n");
}
}
ofstream outFile;
if(!ns_misc::openOutput(args, &outFile))return 1;;
// Write ouput header {{{
outFile<<"# from: "<<args.args()[0]<<"\n# samples of gene expression\n";
if(args.verbose)message("Genes will be ordered as they first appear in %s.\n",(args.getS("trInfoFileName")).c_str());
outFile<<"# Genes will be ordered as they first appear in "<<args.getS("trInfoFileName")<<"\n";
if(doRPKM)outFile<<"# data in RPKM\n";
if(doLog)outFile<<"# L \n";
outFile<<"# T (M rows,N cols)\n";
outFile<<"# G = M "<<G<<"\n# N "<<N<<endl;
// Set precision.
outFile.precision(9);
outFile<<scientific;
// }}}
vector< vector<double> > trs;
vector<long double> normals(N,0);
long double sum;
long i,j,g,gM,m;
if(doAdjust){
vector<double> tr(M);
if(args.verbose)message("Computing normalization constants, because of length adjustment.\n");
for(j=0;j<M;j++){
if(args.verbose)progressLog(j,M);
samples.getTranscript(j,tr);
for(i=0;i<N;i++)
normals[i] += tr[i]/trInfo.L(j);
}
}
if(args.verbose)message("Computing gene expression.\n");
for(g=0;g<G;g++){
if(args.verbose)progressLog(g,G);
gM = trInfo.getGtrs(g).size();
if((long)trs.size()<gM)trs.resize(gM);
for(j=0;j<gM;j++){
m = trInfo.getGtrs(g)[j];
samples.getTranscript( m , trs[j]);
}
for(i=0;i<N;i++){
sum = 0;
for(j=0;j<gM;j++){
if(doAdjust&&(normals[i]>0)){
m = trInfo.getGtrs(g)[j];
sum+=(trs[j][i] / trInfo.L(m)) / normals[i];
}else{
sum+=trs[j][i];
}
}
if(doRPKM)sum=sum*10e9;
if(doLog)sum=log(sum);
outFile<<sum<<" ";
}
outFile<<endl;
}
outFile.close();
if(args.verbose)message("DONE\n");
return 0;
}
#ifndef BIOC_BUILD
int main(int argc,char* argv[]){
return getGeneExpression(&argc,argv);
}
#endif