/
data-reduction-v4.py
102 lines (90 loc) · 2.78 KB
/
data-reduction-v4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/python
import csv, math, sys, re
###
# load data up
###
def loadData(f):
# define a new array to read stuff into
data = [["Measurement", "Value (V)", "Internal precision (1se)"]]
# I only actually want data from the second block of running totals
# TODO: does this need to be flexible for other element runs?
i = 0
for line in f:
if i < 2:
if "Running total results" in line:
i += 1
continue
else:
if not line.strip(): continue
try:
if line[8] != ' ':
data.append( [ line[8:37], line[37:52], line[52:64] ])
except:
continue
return data
# I want to automatically stick the sample name in, too.
def identifySample(f):
for line in f:
if "Sample :" in line:
temp = re.split(r':', line)
sampleName = str(re.search(r'\b..*\b', temp[1]).group(0))
return sampleName
###
# transpose the table
###
def transposeData(runNo, data, sampleName):
transposed = []
# this ultimately wants to be user-controlled - scrape off the row titles,
# present as a drop-down list, and let the user choose which to include in
# the reduced data table, with tickyboxes for whether to include internal
# precision.
headers = ['file', 'sample']
data_out = [runNo, sampleName]
for data_index in [
1,
2,
7,
12,
13,
16,
17,
18,
19,
20,
21,
22,
23,
25,
26,
]:
try:
headers.append(data[data_index][0])
headers.append('internal precision')
data_out.append(float(data[data_index][1]))
data_out.append(float(data[data_index][2]))
except:
continue
return [headers, data_out] # our two rows
if __name__ == '__main__':
if len(sys.argv) < 2:
print >> sys.stderr, "Usage: %s file.csv [file2.csv ...]" % sys.argv[0]
sys.exit(1)
files = sys.argv[1:]
###
# actually do the thing (damn, I'm good at comments)
###
for filename in files:
# pull run number out of filename
runNo = re.search(r'[0-9][0-9]*', filename).group(0)
# do I seriously have to do this in a separate operation because magic?
with open(filename, 'r') as f:
sampleName = identifySample(f)
# dump final running totals for the file into a table
with open(filename, 'r') as f:
data = loadData(f)
transposed = transposeData(runNo, data, sampleName)
# transpose 'em
with open('transposeddata.csv', 'a') as f:
csvwriter = csv.writer(f)
for row in transposed[1:] :
csvwriter.writerow(row)