forked from lfnothias/IODA_MS
/
IODA_run_TOPPAS_exclusion.py
147 lines (124 loc) · 5.78 KB
/
IODA_run_TOPPAS_exclusion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Author: Louis Felix Nothias, louisfelix.nothias@gmail.com, June 2020
import os
import subprocess
import sys
from xvfbwrapper import Xvfb
from logzero import logger, logfile
import datetime
import zipfile
from datetime import date
from IODA_exclusion_workflow import get_all_file_paths
def IODA_exclusion_workflow(input_mzML,ppm_error,narrow_noise_threshold,large_noise_threshold):
#source_mzML = "https://raw.githubusercontent.com/lfnothias/IODA_MS/test2/tests/Euphorbia/exclusion/toppas_input/Blank.mzML"
TOPPAS_Pipeline = "toppas_Exclusion_workflow.toppas"
TOPPAS_output_folder = "toppas_output"
TOPPAS_folder = "TOPPAS_Workflow"
os.system('rm download_results/IODA_OpenMS_results.zip')
os.system('rm -r TOPPAS_Workflow/toppas_output/TOPPAS_out/')
os.system('mkdir download_results')
#large_noise = 5E5
#narrow_noise = 1E5
#ppm_error = 10
#SOURCE_MZML_URL = "https://raw.githubusercontent.com/lfnothias/IODA_MS/test2/tests/Euphorbia/exclusion/toppas_input/Blank.mzML"
today = str(date.today())
now = datetime.datetime.now()
logger.info(now)
logfile('results/logfile_IODA_from_mzML_'+str(today)+'.txt')
print('======')
print('Starting the IODA-Exclusion workflow from a mzML file')
print('======')
print('Getting the mzML, please wait ...')
logger.info('This is the input: '+input_mzML)
if input_mzML.startswith('http'):
if 'google' in input_mzML:
logger.info('This is the Google Drive download link:'+str(input_mzML))
url_id = input_mzML.split('/', 10)[5]
prefixe_google_download = 'https://drive.google.com/uc?export=download&id='
input_mzML = prefixe_google_download+url_id
bashCommand1 = "wget -r "+input_mzML+" -O input.mzML"
cp1 = subprocess.run(bashCommand1,shell=True)
cp1
else:
logger.info('This is the input file path: '+str(input_mzML))
bashCommand2 = "wget -r "+input_mzML+" -O input.mzML"
cp2 = subprocess.run(bashCommand2,shell=True)
cp2
else:
logger.info('This is the input file path: '+str(input_mzML))
bashCommand2 = "cp "+input_mzML+" -O input.mzML"
cp2 = subprocess.run(bashCommand2,shell=True)
cp2
bashCommand3 = "cp input.mzML "+TOPPAS_folder+"/toppas_input/Blank.mzML"
cp3 = subprocess.run(bashCommand3,shell=True)
cp3
print('======')
print('Copying the mzML to the TOPPAS/OpenMS input folder')
print('======')
print('Changing variables of the TOPPAS/OpenMS workflow')
logger.info(' ppm error = '+str(ppm_error))
logger.info(' narrow peak/feature noise threshold = '+str(narrow_noise_threshold))
logger.info(' large peak/feature noise_threshold = '+str(large_noise_threshold))
try:
bashCommand0 = "wget https://github.com/lfnothias/IODA_MS/raw/test2/TOPPAS_Workflow/toppas_Exclusion_workflow.toppas -O TOPPAS_Workflow/toppas_Exclusion_workflow.toppas"
cp0 = subprocess.run(bashCommand0,shell=True)
cp0
a_file = open("TOPPAS_Workflow/toppas_Exclusion_workflow.toppas", "r")
list_of_lines = a_file.readlines()
except:
raise
# Check format for variable
try:
float(large_noise_threshold)
except ValueError:
print("== The noise level must be a float or an integer, such as 6.0e05 ==")
try:
float(narrow_noise_threshold)
except ValueError:
print("== The noise level must be a float or an integer, such as 6.0e05 ==")
try:
float(ppm_error)
except ValueError:
print("== The ppm error must be a float or an integer, such as 10 ppm ==")
# Make string object for the noise level line FFM
noise_line = ''' <ITEM name="noise_threshold_int" value="NOISE" type="double" description="Intensity threshold below which peaks are regarded as noise." required="false" advanced="false" />'''
# Replace noise level for large features FFM
list_of_lines[37] = noise_line.replace('NOISE',str(large_noise_threshold))
# Replace noise level for narrow features FFM
list_of_lines[95] = noise_line.replace('NOISE',str(narrow_noise_threshold))
# Make string object for ppm error FFM
ppm_line = ''' <ITEM name="mass_error_ppm" value="PPM_ERROR" type="double" description="Allowed mass deviation (in ppm)." required="false" advanced="false" />'''
# Replace ppm error for large features FFM
list_of_lines[42] = ppm_line.replace('PPM_ERROR',str(ppm_error))
# Replace ppm error for narrow features FFM
list_of_lines[100] = ppm_line.replace('PPM_ERROR',str(ppm_error))
# Write out the file
a_file = open("TOPPAS_Workflow/toppas_Exclusion_workflow.toppas", "w")
a_file.writelines(list_of_lines)
a_file.close()
print('======')
print('Initializing the TOPPAS/OpenMS workflow')
try:
vdisplay = Xvfb()
vdisplay.start()
except:
raise
print('======')
print('Running the TOPPAS/OpenMS workflow, this could take several minutes, please wait ...')
bashCommand4 = "cd "+TOPPAS_folder+" && /openms-build/bin/ExecutePipeline -in "+TOPPAS_Pipeline+" -out_dir "+TOPPAS_output_folder
try:
cp4 = subprocess.run(bashCommand4,shell=True)
cp4
except:
raise
vdisplay.stop()
print('======')
print('Completed the TOPPAS/OpenMS workflow')
print('======')
print('Zipping up the TOPPAS/OpenMS workflow files')
get_all_file_paths('TOPPAS_Workflow/','download_results/IODA_OpenMS_results.zip')
print('======')
print('Completed zipping up the TOPPAS/OpenMS workflow output files')
print('======')
print('You can continue the rest of the IODA workflow')
if __name__ == "__main__":
IODA_run_TOPPAS_exclusion(str(sys.argv[1]),float(sys.argv[2]),float(sys.argv[3]),float(sys.argv[4]))