/
nanosims-processor.py
executable file
·363 lines (305 loc) · 17 KB
/
nanosims-processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
#!/usr/bin/env python3
import argparse
import re
import pandas as pd
import imageio
import numpy as np
import sims
from scipy import ndimage
from skimage import measure, transform, morphology, segmentation, filters, color
import matplotlib
from matplotlib import colors
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
TK_SILENCE_DEPRECATION=1
parser = argparse.ArgumentParser(description='Tool for parsing Cameca NanoSIMS .im files to produce ratios. \
Basic usage is to read a .im file, and produce single-channel PNGs and/or ratios of \
isotope_1 / (isotope_1 + isotope_2). Also can read in a .png as a mask for ROIs.')
group1 = parser.add_argument_group('BASIC OPTIONS', '')
group2 = parser.add_argument_group('PLOTTING OPTIONS', '')
group3 = parser.add_argument_group('ROI OPTIONS', '')
group1.add_argument('-i', '--input', metavar='PATH', # required=True,
default='GB21_L2_NH4_light_chain1_1.im',
dest='input', help='File for input (a .im file)')
group1.add_argument('-o', '--output', metavar='PATH',
default=None, help='Prefix for output [default: --input without the .im ending]')
group1.add_argument('-c', '--compare1', metavar='"STRING"', default='15N 12C',
help='Focal element or trolley to be the numerator (use quotes if has spaces) [default: "15N 12C"]')
group1.add_argument('-C', '--compare2', metavar='"STRING"', default='14N 12C',
help='Comparand element or trolley to be summed with --compare1 for denominator \
[default: "14N 12C"]')
group1.add_argument('-f', '--frame', metavar='INT', type=int, default=None,
help='Frame index (e.g., "0" would be the first frame) to focus, \
default behavior is to loop through all [default: None]')
group1.add_argument('-F', '--filter_method', metavar='STRING', default=None,
help='Add a filter step, currently confined to one of "gaussian", "median", "tophat" [default: None]')
group1.add_argument('-s', '--sigma', metavar='FLOAT', type=float, default=1.0,
help='Parameter for filtration (sigma for gaussian, size for median) [default: 1.0]')
group2.add_argument('--rough', action='store_true',
help='Only rough plot, then exit [default: False]')
group2.add_argument('-w', '--whole_image', action='store_true',
help='Calculate ratio of specified channels for the full image (all pixels)')
group2.add_argument('-n', '--no_show', action='store_true',
help='Add this to not show plots to your screen, only save. Useful when batch processing')
group3.add_argument('-r', '--roi', metavar='PATH', default=None,
help='Path to corresponding manually drawn ROI file (mask) [default: None]')
group3.add_argument('--count_stat', metavar='STRING', default='mean',
help='Method for aggregating pixel values in each ROI to report in the summary table. \
Does not affect ratios. Current options: mean, sum [default: mean]')
group3.add_argument('-a', '--autoroi', default=False, action='store_true',
help='Auto-generate an ROI by attempting to segment the image with Otsu + watershed method. \
Only saves data in the absence of the --roi flag (to not overwrite user-specified ROIs).')
group3.add_argument('-M', '--min_area', default=100, metavar='INT',
help='Minimum area for --autoroi to recognize a region (to cut down on tiny bright specks/debris)')
group3.add_argument('--otsu_mult_high', default=1.2, metavar='FLOAT', type=float,
help='The --autoroi uses Otsu thresholding to determing "object" and "background" intensities, so \
this value is multiplied by the Otsu threshold to determine the UPPER bound')
group3.add_argument('--otsu_mult_low', default=0.5, metavar='FLOAT', type=float,
help='The --autoroi uses Otsu thresholding to determing "object" and "background" intensities, so \
this value is multiplied by the Otsu threshold to determine the LOWER bound')
args = parser.parse_args()
def apply_filter(img, filt_method, sigma):
if filt_method == 'gaussian':
for f in img.frame.values:
for specie in img.species.values:
img.loc[specie, f, :, :] = ndimage.gaussian_filter(img.loc[specie, f].data, sigma=sigma)
return img
if filt_method == 'median':
for f in img.frame.values:
for specie in img.species.values:
img.loc[specie, f, :, :] = ndimage.median_filter(img.loc[specie, f].data, size=int(sigma))
if filt_method == 'tophat':
for f in img.frame.values:
for specie in img.species.values:
footprint = morphology.disk(1)
res = morphology.white_tophat(img.loc[specie, f].data, footprint)
img.loc[specie, f, :, :] = img.loc[specie, f].data - res
return img
def rough_plot(img):
for f in img.frame.values:
for specie in img.species.values:
plt.cla()
plt.imshow(img.loc[specie, f], cmap='gray', interpolation='none')
plt.gray()
plt.axis('off')
plt.colorbar()
plt.title(specie)
plt.show()
def save_plot(img):
for f in img.frame.values:
for specie in img.species.values:
plt.cla()
plt.imshow(img.loc[specie, f], cmap='gray', interpolation='none')
plt.gray()
plt.axis('off')
plt.colorbar()
plt.title(specie)
plt.imsave(fname=args.output + "_f" + str(f) + "_" + specie + ".png",
arr=aligned_image.loc[specie, f], cmap='gray')
def get_image_from_raw_rois(roi, im):
if roi.shape[0:2] == im.data.shape[2:4]:
print("The ROI png has the same pixel dimension as the image, great! [no ROI cropping]")
elif roi.shape[0] > im.data.shape[2]:
print("ROI png dimensions are larger than the image, trying to correct...")
edges = np.concatenate([roi[[0, -1], 0, :], roi[[0, -1], 0, :]]).reshape((4, 3))
if np.all(edges == edges[0, :]): # check if edges are all the same RGB (likely border)
print("Edge pixels are all the same color, assuming this is background so trimming these solid edges")
not_outside = np.any(roi != edges[0, :], axis=-1)
roi = roi[not_outside, :]
width_roi = int(np.sqrt(roi.shape[0]))
width_image = im.data.shape[3]
roi = roi.reshape((width_roi, width_roi, 3))
# check if this fixed it
if roi.shape[0:2] == im.data.shape[2:4]:
print("Cropped successfully, no rescaling!")
else:
print("FYI... ROI was still too big @ "+str(width_roi)+"px vs. "+str(width_image)+"px so scaling")
roi = transform.resize(roi, (width_image, width_image, 3), anti_aliasing=True)*255
else:
print("Your ROI is smaller? Stopping so you can fix. Image: " + im.data.shape + " vs roi: " + roi.shape)
exit()
return roi
def parse_ROIs(objects, grp_col, c1, c2, annotated_im, im, stats):
for obj in range(objects[1]):
obj_x, obj_y = np.where(objects[0] == (obj + 1))
pts = np.where(objects[0] == (obj + 1))
pts = np.reshape(pts, (2, len(pts[0])))
pts = [pts[:, x] for x in range(pts.shape[1])]
counts1 = sum([c1.data[x, y] for x, y in pts])
counts2 = sum([c2.data[x, y] for x, y in pts])
if args.count_stat == 'mean':
all_counts = [x for x in np.array([im.data[:, x, y] for x, y in pts]).sum(axis=0)]
elif args.count_stat == 'sum':
all_counts = [x for x in np.array([im.data[:, x, y] for x, y in pts]).mean(axis=0)]
else:
print("Somehow there is no stat? fix the code so args.count_stat is not: " + args.count_stat)
all_counts = None
exit()
rat_im = counts1/(counts1 + counts2)
annotated_im[obj_x, obj_y] = rat_im
obj_stats = [grp_col, obj]
obj_stats.extend(all_counts)
obj_stats.append(rat_im)
stats.append(obj_stats)
return annotated_im, stats
def autosegment(full_img, low_mult=args.otsu_mult_low, high_mult=args.otsu_mult_high, too_small=args.min_area):
img = full_img.data[np.where(full_img.species.values == 'SE')[0][0], 0, :, :]
img = 255*(img/img.max())
thresh = filters.threshold_otsu(img)
markers = np.zeros_like(img)
markers[img < low_mult*thresh] = 1
markers[img > (high_mult*thresh)] = 2
# plt.imshow(markers, cmap='Greys_r', interpolation=None)
# plt.show()
# segment
label_image = segmentation.watershed(filters.sobel(img), markers) - 1
# drop segments with small area
label_image = morphology.area_opening(label_image, area_threshold=too_small)
auto_rois = measure.label(label_image, return_num=True)
if not args.no_show:
plt.clf()
plt.imshow(img, cmap='Greys_r', interpolation=None)
plt.imshow(color.label2rgb(label_image, bg_label=0), interpolation=None, alpha=0.3)
for reg in measure.regionprops(auto_rois[0]):
ry, rx = reg.centroid
plt.text(rx + 0, ry - 0, s=reg.label, ha='center', va='center', bbox=dict(fc='white', ec='none', pad=2))
plt.savefig(fname=args.output + "_auto_roi_watershed.png")
plt.show()
return auto_rois
##############
# BODY #
##############
image = sims.SIMS(args.input)
aligned_image, shifts = sims.utils.align(image) # account for inter-frame shifts
if args.compare1 not in aligned_image.species:
print("Your --compare1 value is not in the .im file? Did you put an underscore in? Available names:\n" +
", ".join(aligned_image.species.values))
exit()
if args.compare2 not in aligned_image.species:
print("Your --compare2 value is not in the .im file? Did you put an underscore in? Available names:\n" +
", ".join(aligned_image.species.values))
exit()
# remove 1px border on all sides because Cameca
aligned_image = aligned_image.drop_isel(x=[0, -1], y=[0, -1])
if args.frame is not None:
print('subsetting to frame ' + str(args.frame))
aligned_image = aligned_image.loc[:, [args.frame], :, :]
if args.filter_method:
aligned_image = apply_filter(img=aligned_image, filt_method=args.filter_method, sigma=args.sigma)
if args.output is None:
args.output = re.sub(".im", "", args.input)
if args.rough:
rough_plot(aligned_image)
exit()
# PREP STATS
stats_table = list()
annotated_image = np.zeros(aligned_image.data.shape[2:4])
# PARSE ROIS OR AUTOSEGMENT TO GENERATE ROIS
if args.autoroi:
auto_objects = autosegment(full_img=aligned_image)
if args.roi is not None:
rois = imageio.imread(args.roi, pilmode='RGB')
# hack becasue DPI is different between saved png and imported roi
rois = get_image_from_raw_rois(roi=rois, im=aligned_image)
# 3 groups of rois - red, green, blue
red_rois = rois[:, :, 0]-rois[:, :, 2]
red_rois = 1*(red_rois > 200)
green_rois = rois[:, :, 1]-rois[:, :, 2]
green_rois = 1*(green_rois > 200)
blue_rois = rois[:, :, 2]-rois[:, :, 1]
blue_rois = 1*(blue_rois > 200)
# segment by contiguous colors by category
red_objects = measure.label(red_rois, return_num=True)
green_objects = measure.label(green_rois, return_num=True)
blue_objects = measure.label(blue_rois, return_num=True)
# make a nice annotated image so rois are numbered to correspond to future stats_table
obj = np.where(red_objects[0] > 0, red_objects[0], 0) + \
np.where(green_objects[0] > 0, green_objects[0] + red_objects[1], 0) + \
np.where(blue_objects[0] > 0, blue_objects[0] + red_objects[1] + green_objects[1], 0)
cmap, norm = colors.from_levels_and_colors(levels=np.unique(obj), extend='max',
colors=np.hstack((np.repeat('black', 1),
np.repeat('red', red_objects[1]),
np.repeat('green', green_objects[1]),
np.repeat('blue', blue_objects[1]))))
plt.imshow(obj, cmap=cmap, norm=norm, interpolation='none')
for region in measure.regionprops(measure.label(red_rois)):
ry, rx = region.centroid
plt.text(rx, ry, s=region.label, ha='center', va='center', bbox=dict(fc='white', ec='none', pad=2))
for region in measure.regionprops(measure.label(blue_rois)):
ry, rx = region.centroid
plt.text(rx, ry, s=region.label, ha='center', va='center', bbox=dict(fc='white', ec='none', pad=2))
for region in measure.regionprops(measure.label(blue_rois)):
ry, rx = region.centroid
plt.text(rx, ry, s=region.label, ha='center', va='center', bbox=dict(fc='white', ec='none', pad=2))
plt.savefig(fname=args.output + "_roi_table.png")
if not args.no_show:
plt.show()
plt.cla()
elif not args.whole_image and not args.autoroi:
print("No ROI, saving pngs for each channel so you can use for making ROIs (or rerun with -r),\n \
OR use with --autoroi feature (running now so you can see what it looks like)")
save_plot(aligned_image)
auto_objects = autosegment(full_img=aligned_image)
# PROCESS ROIS COMPUTED ABOVE
for frame in range(aligned_image.data.shape[1]):
# calculate ratio of desired vs (desired + ref)
comp1 = aligned_image.loc[args.compare1, frame, :, :]
comp2 = aligned_image.loc[args.compare2, frame, :, :]
if args.whole_image:
ratio_image = comp1/(comp1 + comp2)
ratio_image = np.nan_to_num(ratio_image)
plt.cla()
plt.imshow(ratio_image, interpolation='none', cmap='cividis')
plt.axis('off')
plt.colorbar().set_label("Fraction: " + sims.utils.format_species(args.compare1) + " / (" +
sims.utils.format_species(args.compare1)+" + " +
sims.utils.format_species(args.compare2) + ")")
plt.title(sims.utils.format_species(args.compare1) + " / (" + sims.utils.format_species(args.compare1) + " + " +
sims.utils.format_species(args.compare2) + ")")
plt.savefig(fname=args.output + "_whole_f" + str(frame) +
"_ratio" + re.sub(" ", "_", args.compare1) + "-x-" + re.sub(" ", "_", args.compare2) + ".png")
if not args.no_show:
plt.show()
if not (args.roi or args.autoroi):
continue
if args.roi:
try:
annotated_image, stats_table = parse_ROIs(objects=red_objects, grp_col='red', c1=comp1, c2=comp2,
im=aligned_image.loc[:, frame, :, :],
annotated_im=annotated_image, stats=stats_table)
except (TypeError, NameError):
print("No red, continuing")
try:
annotated_image, stats_table = parse_ROIs(objects=green_objects, grp_col='green', c1=comp1, c2=comp2,
im=aligned_image.loc[:, frame, :, :],
annotated_im=annotated_image, stats=stats_table)
except (TypeError, NameError):
print("No green, continuing")
try:
annotated_image, stats_table = parse_ROIs(objects=blue_objects, grp_col='blue', c1=comp1, c2=comp2,
im=aligned_image.loc[:, frame, :, :],
annotated_im=annotated_image, stats=stats_table)
except (TypeError, NameError):
print("No blue huh?")
elif args.autoroi or not args.whole_image:
annotated_image, stats_table = parse_ROIs(objects=auto_objects, grp_col='auto', c1=comp1, c2=comp2,
im=aligned_image.loc[:, frame, :, :],
annotated_im=annotated_image, stats=stats_table)
stats_columns = ["Group", "ROI"]
stats_columns.extend(aligned_image.species.values)
stats_columns.append("Ratio_" + args.compare1 + "x" + args.compare2)
stats_columns = [re.sub(" ", "_", x) for x in stats_columns]
stats_table = pd.DataFrame(stats_table, columns=stats_columns)
stats_table.to_csv(args.output + "_f0" +
"_ratio" + re.sub(" ", "_", args.compare1) + "-x-" + re.sub(" ", "_", args.compare2) +
".tsv", sep="\t", index=False)
plt.clf()
plt.imshow(annotated_image, interpolation='none', cmap='gray')
plt.axis('off')
plt.colorbar().set_label("Fraction (per ROI)")
plt.title(sims.utils.format_species(args.compare1) + " / (" + sims.utils.format_species(args.compare1)+" + " +
sims.utils.format_species(args.compare2) + ")")
plt.savefig(fname=args.output + "_f" + str(frame) +
"_ratio" + re.sub(" ", "_", args.compare1) + "-x-" + re.sub(" ", "_", args.compare2) + ".png")
if not args.no_show:
plt.show()