-
Notifications
You must be signed in to change notification settings - Fork 0
/
make_qc.py
125 lines (98 loc) · 5.24 KB
/
make_qc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import clr
import os
import sys
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')
#sys.path.append("/usr/bin/mono") # Adjust this path to Mono's installation directory
#import pythonnet
#pythonnet.set_runtime("/usr/bin/mono")
# Set the base path to where the RawFileReader repository is cloned in the Docker container
base_path = '/RawFileReader'
# Update the paths to the DLL files based on the RawFileReader repository location in the Docker container
clr.AddReference(os.path.join(base_path, 'Libs/Net471/ThermoFisher.CommonCore.Data.dll'))
clr.AddReference(os.path.join(base_path, 'Libs/Net471/ThermoFisher.CommonCore.RawFileReader.dll'))
clr.AddReference(os.path.join(base_path, 'Libs/Net471/ThermoFisher.CommonCore.BackgroundSubtraction.dll'))
clr.AddReference(os.path.join(base_path, 'Libs/Net471/ThermoFisher.CommonCore.MassPrecisionEstimator.dll'))
# Continue with the rest of your script imports
from System import *
from System.Collections.Generic import *
from ThermoFisher.CommonCore.Data import ToleranceUnits, Extensions
from ThermoFisher.CommonCore.Data.Business import ChromatogramSignal, ChromatogramTraceSettings
from ThermoFisher.CommonCore.Data.Business import DataUnits, Device, GenericDataTypes, SampleType, Scan, TraceType
from ThermoFisher.CommonCore.Data.FilterEnums import IonizationModeType, MSOrderType
from ThermoFisher.CommonCore.Data.Interfaces import IChromatogramSettings, IScanEventBase, IScanFilter, RawFileClassification
from ThermoFisher.CommonCore.MassPrecisionEstimator import PrecisionEstimate
from ThermoFisher.CommonCore.RawFileReader import RawFileReaderAdapter
def ReadScanInformation(rawFile, firstScanNumber, lastScanNumber):
#print('extract data')
time_ms, current_ms, ms_type = [], [], []
#time_ms2, current_ms2 = [], []
for scanIndex, scan in enumerate(tqdm(range(firstScanNumber, lastScanNumber))):
time = rawFile.RetentionTimeFromScanNumber(scan)
scanFilter = IScanFilter(rawFile.GetFilterForScanNumber(scan))
# Common operations for both MS1 and MS2
scanStatistics = rawFile.GetScanStatsForScanNumber(scan)
segmentedScan = rawFile.GetSegmentedScanFromScanNumber(scan, scanStatistics)
total_current = np.sum(segmentedScan.Intensities)
time_ms.append(time)
current_ms.append(total_current)
ms_type.append(scanFilter.MSOrder)
#print('end extract data')
return time_ms, current_ms, ms_type
def plot_ms_cycle_time_and_total_current(df, out_path, instrument_name=None):
# Set up the plot layout
fig, axes = plt.subplots(figsize=(16, 8), ncols=2, nrows=2)
axes = axes.flatten()
# Filter the DataFrame for MS1 and MS2 data and calculate cycle times
ms1 = df[df['MS'] == 'Ms'].copy()
ms1.loc[:, 'cycle_time'] = ms1['RT'] - ms1['RT'].shift(1)
ms2 = df[df['MS'] == 'Ms2'].copy()
ms2.loc[:, 'cycle_time'] = ms2['RT'] - ms2['RT'].shift(1)
# Plot cycle time by RT_bin for MS1 and MS2
ms1.groupby('RT_bin')['cycle_time'].mean().reset_index().plot(kind='line', x='RT_bin', y='cycle_time', ax=axes[0], label='MS1')
ms2.groupby('RT_bin')['cycle_time'].mean().reset_index().plot(kind='line', x='RT_bin', y='cycle_time', ax=axes[1], label='MS2')
# Plot total current by RT_bin for MS1 and MS2
ms1.groupby('RT_bin')['total_current'].mean().reset_index().plot(kind='line', x='RT_bin', y='total_current', ax=axes[2], label='MS1')
ms2.groupby('RT_bin')['total_current'].mean().reset_index().plot(kind='line', x='RT_bin', y='total_current', ax=axes[3], label='MS2')
axes[0].set_ylabel('Cycle time (s)')
axes[1].set_ylabel('Cycle time (s)')
axes[2].set_ylabel('Total current')
axes[3].set_ylabel('Total current')
axes[0].set_xlabel('Retention Time (s)')
axes[1].set_xlabel('Retention Time (s)')
axes[2].set_xlabel('Retention Time (s)')
axes[3].set_xlabel('Retention Time (s)')
file_name = os.path.basename(out_path)
# Optionally print the instrument name if provided
if instrument_name:
plt.suptitle(f'QC {file_name} from {instrument_name} ')
plt.tight_layout()
plt.savefig(os.path.join(out_path+'.QC.png'))
if __name__ == '__main__':
# The first argument after the script name is the path to the RAW file
raw_file_path = sys.argv[1]
rawFile = RawFileReaderAdapter.FileFactory(raw_file_path)
rawFile.SelectInstrument(Device.MS, 1)
firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum
lastScanNumber = rawFile.RunHeaderEx.LastSpectrum
startTime = rawFile.RunHeaderEx.StartTime
endTime = rawFile.RunHeaderEx.EndTime
instrument_name = rawFile.GetInstrumentData().Name
file_name = os.path.basename(raw_file_path)
print(f'Analysing {file_name} file from {instrument_name}')
print(f'startTime {startTime} endTime: {endTime}')
out_list = ReadScanInformation(rawFile, firstScanNumber, lastScanNumber)
#build a dataframe with the data
df = pd.DataFrame()
df['RT']=out_list[0]
df['total_current']=out_list[1]
df['MS']=out_list[2]
df['MS']=df['MS'].astype(str)
df['RT_bin']=df['RT'].astype(int)
print('Make QC plot')
plot_ms_cycle_time_and_total_current(df, raw_file_path, instrument_name=instrument_name)
print('All done')