-
Notifications
You must be signed in to change notification settings - Fork 2
/
preprocessing_main.py
816 lines (659 loc) · 39.4 KB
/
preprocessing_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 23 10:45:10 2022
@author: Roger Amundsen
"""
#pip install pyvips==2.1.12
import os
#print('path =', os.getenv('PATH
# change this for your install location and vips version, and remember to
# use double backslashes
vipshome = 'C:\\vips-dev-8.10\\bin'
# set PATH
os.environ['PATH'] = vipshome + ';' + os.environ['PATH']
string = os.environ['PATH']
new_string=string.split(';')[0]
a=string.split(';')[0]
for path in string.split(';')[1:]:
if a != path:
new_string=new_string+';'+path
a = path
os.environ['PATH']=new_string
#print(os.environ['PATH'])
# and now pyvips will pick up the DLLs in the vips area
import pyvips
from skimage import morphology
from scipy import ndimage
import numpy as np
import pickle
import time
import cv2
import matplotlib.pyplot as plt
import math
from skimage.measure import label, regionprops
#from skimage.filters import threshold_otsu
from skimage.morphology import closing, square
import xml.etree.ElementTree as ET
def find_regions(mask, threshold, remove=False):
'''Finds individual regions in a mask. These regions are not connected to eachother.
input:
mask: e.g. lesion benign mask, normal tissue mask etc.
remove: If true remove regions that have area smaller than threshold;
threshold: min_size of a region. If remove is true, remove regions that are smaller than threshold
return:
labels: each region is labeled with a different integer'''
# Use a boolean condition to find where pixel values are > 0.75
blobs = mask > 0.75
labels, regions_found_in_wsi_before_removing_small_obj = ndimage.label(blobs)
print('\tFound {} regions'.format(regions_found_in_wsi_before_removing_small_obj))
if remove:
# Remove all the small regions
labels = morphology.remove_small_objects(labels, min_size=threshold)
return labels
def box_around_region(region):
'''creates a square box around a region/polygon. This box will be used to scan for valid
tiles instead of having to scan the whole WSI
input:
region: skimage region coordinates
output:
coorner coordinates of the box'''
y_box_min = min(region.coords[:,0])
x_box_min = min(region.coords[:,1])
y_box_max = max(region.coords[:,0])
x_box_max = max(region.coords[:,1])
return [y_box_min, y_box_max, x_box_min, x_box_max]
def mask_overlay(dict_with_masks, alpha_chan):
'''Draw a mask overlay on top of the output image'''
for tissue in dict_with_masks:
if tissue == 'lesion benign':
dict_with_masks[tissue] = cv2.merge((dict_with_masks[tissue], alpha_chan, alpha_chan, alpha_chan))
elif tissue == 'lesion malignant':
dict_with_masks[tissue] = cv2.merge((alpha_chan, alpha_chan, dict_with_masks[tissue], alpha_chan))
elif tissue == 'normal tissue':
dict_with_masks[tissue] = cv2.merge((alpha_chan, dict_with_masks[tissue], alpha_chan, alpha_chan))
else:
dict_with_masks[tissue] = cv2.merge((alpha_chan, alpha_chan, alpha_chan, dict_with_masks[tissue]))
return dict_with_masks
def tile_variance(pyvips_img, threshold):
'''Find images with low variance: background images
input:
pyvips_img: pyvips object representation of an WSI image patch
threshold: minimum variance a patch can have before it is disregarded
return:
False if patch is rejected or True if patch is approved'''
vips2np = np.ndarray(buffer=pyvips_img.write_to_memory(), dtype='uint8', shape=[pyvips_img.height, pyvips_img.width, pyvips_img.bands])
r, g, b = vips2np[:,:,0], vips2np[:,:,1], vips2np[:,:,2]
r_mean, g_mean, b_mean = np.mean(r), np.mean(g), np.mean(b)
std = np.std(np.array([r_mean, g_mean, b_mean]))
mean_of_mean = np.mean(np.array([r_mean,g_mean,b_mean]))
#if mean < threshold:
# plt.subplot(2,2,1)
# plt.imshow(vips2np)
# plt.subplot(2,2,2)
# plt.imshow(vips2np_hsv[:,:,0])
# plt.subplot(2,2,3)
# plt.imshow(vips2np_hsv[:,:,1])
# plt.subplot(2,2,4)
# plt.imshow(vips2np_hsv[:,:,2])
# plt.show()
return False if mean_of_mean > threshold and std < 4.5 else True
class Preprocess:
def __init__(self,
tissue_classes_to_fit_tiles_on = ['normal tissue', 'lesion benign', 'lesion malignant'],
mask_hierarchy = ['normal tissue', 'lesion benign', 'lesion malignant'],
phi =0.7,
alpha = '40x',
tau = '2.5x',
tiles_to_show = ['40x'],
tile_size = 256,
save_tiles_as_jpeg=False,
save_binary_annotation_mask=True,
remove_small_regions = False,
small_region_remove_threshold = 1000,
wsi_dataset_file_path = 'WSIs/',
wsi_path = 'WSI_all/',
output_folder = 'Output/',
coordinate_path = 'coordinates/',
extracted_tiles_folder = 'Extracted_tiles/',
skip_list = ['SUShud24 - 2021-11-17 10.53.41.ndpi',
'SUShud42 - 2021-12-06 11.24.59.ndpi',
'SUShud47 - 2021-12-06 11.33.10.ndpi',
'SUShud53 - 2021-12-06 11.53.26.ndpi',
'SUShud77 - 2021-12-06 13.52.35.ndpi',
'SUShud92 - 2021-12-07 16.12.10.ndpi',
'SUShud93 - 2021-12-07 16.23.26.ndpi',
'SUShud95 - 2021-12-07 16.35.31.ndpi',
'Master benign-malign diagnose.xlsx'],
magnification_levels = {0: '40x', 1: '20x', 2: '10x', 3: '5x',
4: '2.5x', 5: '1.25x', 6: '0.625x'},
magnification_levels_to_save = ['40x','20x','10x', '2.5x'],
wsi_name = None
):
self.tissue_classes_to_fit_tiles_on = tissue_classes_to_fit_tiles_on
self.mask_hierarchy = mask_hierarchy
self.phi = phi
self.alpha = alpha
self.tau = tau
self.tiles_to_show = tiles_to_show
self.tile_size = tile_size
self.save_tiles_as_jpeg = save_tiles_as_jpeg
self.save_binary_annotation_mask = save_binary_annotation_mask
self.remove_small_regions = remove_small_regions
self.small_region_remove_threshold = small_region_remove_threshold
self.wsi_dataset_file_path = wsi_dataset_file_path
self.wsi_path = wsi_path
self.output_folder = output_folder
self.coordinate_path = coordinate_path
self.extracted_tiles_folder = extracted_tiles_folder
self.skip_list = skip_list
self.magnification_levels = magnification_levels
self.magnification_levels_to_save = magnification_levels_to_save
self.wsi_name = wsi_name
def make_directories(self):
'''Create directories for preprocessing. Folders to save extracted tiles, masks, and overview image with tiles on'''
os.makedirs(self.wsi_dataset_file_path, exist_ok=True)
os.makedirs(self.output_folder, exist_ok=True)
os.makedirs(self.coordinate_path, exist_ok=True)
for wsi_name in os.listdir(self.wsi_path):
if wsi_name not in self.skip_list:
os.makedirs(self.wsi_dataset_file_path+wsi_name[:-5], exist_ok=True)
if self.save_tiles_as_jpeg:
os.makedirs(self.extracted_tiles_folder, exist_ok=True)
def load_wsi(self, levels=list(range(7))):
'''Load pyvips wsi
Input:
path_name: path and file name
magnification_level_names: Dictionary containing the wsi levels as keys and their names as values,
eg: {0: '40x', 1: '20x', 2: '10x', 3: '5x'}.
levels: magnification level/levels to load, list or single integer.
Return:
image_dictionary: dictionary with magnification levels as keys and pyvips objects as values
area_ratio_dictionary: dictionary with magnification levels as keys and area ratios as values
'''
if isinstance(levels, int):
levels = [levels]
self.image_dictionary={}
for i in levels:
name = self.magnification_levels[i]
self.image_dictionary[name] = pyvips.Image.new_from_file(self.wsi_path+self.wsi_name, level=i, autocrop=True).flatten()
highest_magnification_name = self.magnification_levels[min(levels)]
highest_magnification_area = self.image_dictionary[highest_magnification_name].width*self.image_dictionary[highest_magnification_name].height
self.area_ratio_dictionary = {}
for magnification, image in self.image_dictionary.items():
image_area = image.width*image.height
area_ratio = highest_magnification_area/image_area
self.area_ratio_dictionary[magnification]= area_ratio
def load_mask_dict(self):
try:
with open(self.wsi_dataset_file_path+self.wsi_name[:-5]+'/masks.obj', 'rb') as f:
dict_ = pickle.load(f)
self.mask_dict = {}
for requested_class in self.tissue_classes_to_fit_tiles_on:
if requested_class in dict_:
self.mask_dict[requested_class] = dict_[requested_class]
except:
self.mask_dict = None
def remove_mask_overlap(self):
'''remove from mask if it is overlapping with another and is below it in hierarchy'''
for class1 in [i for i in self.mask_hierarchy if i in self.mask_dict]:
class1_mask = self.mask_dict[class1]
#print(class1, np.unique(class1_mask))
for class2 in self.mask_hierarchy:
if class1!=class2:
class2_mask = self.mask_dict[class2]
sum_masks = class1_mask+class2_mask
#print(class1, class2)
#print(np.unique(sum_masks))
if len(np.where(sum_masks == 2)[0])>0:
hierarchy_class1 = self.mask_hierarchy.index(class1)
hierarchy_class2 = self.mask_hierarchy.index(class2)
if hierarchy_class1< hierarchy_class2:
print('removing overlap between {} and {} from {}'.format(class1,class2,class2))
class2_mask[sum_masks==2]=0
else:
class1_mask[sum_masks==2]=0
print('removing overlap between {} and {} from {}'.format(class1,class2,class1))
def dynamic_TAU_size(self, threshold):
'''Reduces tau to a lower magnification level if the number of pixels in the image is higher than a given threshold
input:
threshold: the max number of pixels'''
highest_magnification = self.magnification_levels[min(self.magnification_levels.keys())]
height = self.image_dictionary[highest_magnification].height
width = self.image_dictionary[highest_magnification].width
if height*width > threshold:
self.tau = '1.25x'
else:
self.tau = '2.5x'
def image_pyramid_measurements(self):
'''calculates ratios and sizes of the images in the wsi_pyramid'''
# Ratio between alpha and tau
self.ratio_alpha_tau = self.area_ratio_dictionary[self.alpha]/self.area_ratio_dictionary[self.tau]
#Define the tile size of alpha on the tau image
self.tile_size_alpha_on_tau = int(np.sqrt(self.ratio_alpha_tau)*self.tile_size)
#create a subset of levels that are not tau
self.subset_of_levels = []
for magnification in self.magnification_levels_to_save:
if magnification != self.tau:
self.subset_of_levels.append(magnification)
# Find width/heigh of tau image
self.tau_image_width = self.image_dictionary[self.tau].width
self.tau_image_height = self.image_dictionary[self.tau].height
def patch_criteria(self):
'''calculate pathc criteria: the amount of pixels within a patch that has to belong to a mask (have the value 1 or 255) for the patch to be valid'''
self.criteria = pow(self.tile_size,2)*self.ratio_alpha_tau * self.phi
def possible_x_and_y_coordinates(self, min_max_coordinates):
'''returns a list of all coordinates that has the potential to become a tile
input:
min_max_coorinates: the coordinates of a box around the region/polygon/ROI
ratio: the ratio between the alpha layer and tau layer
tile size: tile size to create a pad around the box
image_shape: to make sure the range of coordinate does not extend outside of the image
return:
top left corner coorinates of potential tiles'''
#tile_size_alpha_on_tau = int(tile_size*ratio) #find tile size at tau
y_min, y_max = min_max_coordinates[0], min_max_coordinates[1]
x_min, x_max = min_max_coordinates[2], min_max_coordinates[3]
pad = int(self.tile_size/2)
range_x = range(max(x_min-pad,0), min((x_max+pad)-self.tile_size,self.tau_image_width-self.tile_size), self.tile_size_alpha_on_tau)
range_y = range(max(y_min-pad,0), min((y_max+pad)-self.tile_size,self.tau_image_height-self.tile_size), self.tile_size_alpha_on_tau)
return (list(range_x), list(range_y))
def find_valid_tiles(self, all_x_all_y_pos, mask):
'''Finds valid tiles from alpha level by checking that the tile area contains enough mask area within it.
input:
criteria: the minimum ratio of mask area within the tile area.
tile_size: tile size aka patch size
tile_size_alpha_on_tau: the ratio between the area of a tile at alpha level and tau level
return:
list_of_valid_tiles_from_current_class: list of top left coordinates of valid tiles in the current class/tissue-type'''
all_x, all_y = all_x_all_y_pos
list_of_valid_tiles_from_current_class = []
for y_pos in all_y:
y_center = y_pos + int(self.tile_size/2)
y_top = y_center - int(self.tile_size_alpha_on_tau/2)
y_base = y_center + int(self.tile_size_alpha_on_tau/2)
for x_pos in all_x:
x_center = x_pos + int(self.tile_size/2)
x_left = x_center - int(self.tile_size_alpha_on_tau/2)
x_right = x_center + int(self.tile_size_alpha_on_tau/2)
# Equation 1 in paper
if int(sum(sum(mask[y_top:y_base,
x_left:x_right]))) >= self.criteria:
list_of_valid_tiles_from_current_class.append((x_pos, y_pos))
return list_of_valid_tiles_from_current_class
def create_coordinate_dictionary(self,
dict_of_all_predicted_coordinates,
list_of_valid_tiles,
current_class,
remove_tiles=True,
threshold=150):
'''creates a dictionary containing the coordinates of valid tiles for each magnification level for each WSI
input:
valid_tiles_from_current_wsi: list of valid tiles given by their top left x- and y-coordinates [(x1,y1),(x2,y2)...]
'''
idx_of_valid_tiles= list(range(len(list_of_valid_tiles)))
start_index = len(dict_of_all_predicted_coordinates.keys())
id_number = start_index
#for idx, current_xy_pos in enumerate(list_of_valid_tiles[start_index:]):
for idx, current_xy_pos in enumerate(list_of_valid_tiles):
approved_tile = True
tile_x_tau = current_xy_pos[0]
tile_y_tau = current_xy_pos[1]
if remove_tiles:
ratio_tau_alpha = self.area_ratio_dictionary[self.tau]/self.area_ratio_dictionary[self.alpha]
tile_x_alpha = (tile_x_tau+(self.tile_size/2))*np.sqrt(ratio_tau_alpha)-self.tile_size/2
tile_y_alpha = (tile_y_tau+(self.tile_size/2))*np.sqrt(ratio_tau_alpha)-self.tile_size/2
tile_to_save = self.image_dictionary[self.alpha].extract_area(int(tile_x_alpha),int(tile_y_alpha),self.tile_size, self.tile_size)
approved_tile = tile_variance(tile_to_save, threshold)
if approved_tile:
dict_of_all_predicted_coordinates[id_number] = dict()
dict_of_all_predicted_coordinates[id_number]['path'] = self.wsi_path+self.wsi_name
dict_of_all_predicted_coordinates[id_number]['tissue_type'] = current_class
# Equation 2 in paper.
for beta in self.magnification_levels_to_save:
ratio_tau_beta = self.area_ratio_dictionary[self.tau]/self.area_ratio_dictionary[beta]
tile_x = (tile_x_tau+(self.tile_size/2))*np.sqrt(ratio_tau_beta)-self.tile_size/2
tile_y = (tile_y_tau+(self.tile_size/2))*np.sqrt(ratio_tau_beta)-self.tile_size/2
dict_of_all_predicted_coordinates[id_number][str(beta)] = (int(tile_x), int(tile_y))
if self.save_tiles_as_jpeg:
tile_to_save = self.image_dictionary[beta].extract_area(int(tile_x),int(tile_y),self.tile_size, self.tile_size)
tile_to_save.jpegsave(self.extracted_tiles_folder + self.wsi_name[:-5] + '/tile_{}_{}_{}_{}.jpeg'.format(id_number,beta, int(tile_x), int(tile_y)), Q=100)
id_number += 1
else:
idx_of_valid_tiles.remove(idx)
list_of_valid_tiles = [list_of_valid_tiles[i] for i in idx_of_valid_tiles]
return dict_of_all_predicted_coordinates, list_of_valid_tiles
def save_coordinates(self, coordinates):
with open(self.coordinate_path+self.wsi_name[:-5]+'_tile_size_'+str(self.tile_size)+'.obj', 'wb') as handle:
pickle.dump(coordinates, handle, protocol=pickle.HIGHEST_PROTOCOL)
def draw_tiles_on_image(self, image, valid_tiles):
'''Draws the valid tiles on the output image
input:
image
valid_tiles: list of top-left coordinates of the valid tiles at TAU level
'''
for current_xy_pos in valid_tiles:
# Equation 3 in paper.
for beta in self.tiles_to_show:
ratio_beta_tau = self.area_ratio_dictionary[beta]/self.area_ratio_dictionary[self.tau]
center_x_tau = current_xy_pos[0]+(self.tile_size/2)
center_y_tau = current_xy_pos[1]+(self.tile_size/2)
start_x = int(center_x_tau-(self.tile_size/2)*np.sqrt(ratio_beta_tau))
start_y = int(center_y_tau-(self.tile_size/2)*np.sqrt(ratio_beta_tau))
end_x = int(start_x + self.tile_size*np.sqrt(ratio_beta_tau))
end_y = int(start_y + self.tile_size*np.sqrt(ratio_beta_tau))
color = (0, 0, 255) if beta == self.alpha else (0, 255, 0)
cv2.rectangle(image, (start_x, start_y), (end_x, end_y), color, 1)
def save_overview_image(self, image, num_tiles):
# Save overview image
cv2.imwrite(self.output_folder + self.wsi_name[:-5] + '/image_with_mask_and_tiles_alpha_{}_phi_{}_{}.jpg'.format(self.alpha, self.phi, num_tiles), image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
class Foreground_background_segmentation(Preprocess):
def __init__(self,
level=6,
hue_min=100,
hue_max=180,
size2remove=100):
self.level = level
self.hue_min = hue_min
self.hue_max = hue_max
self.size2remove = size2remove
super().__init__()
def remove_small_regions_from_mask(self, mask):
'''closes holes in mask and removes regions that are smaller than a threshold named size2remove.
input:
mask
return:
closed mask'''
# Closing of the image and gather the labels of the image
mask = closing(mask, square(3))
label_image = label(mask)
# Run through the image labels and set the small regions to zero
props = regionprops(label_image)
for region in props:
if region.area < self.size2remove:
minY, minX, maxY, maxX = region.bbox
mask[minY:maxY, minX:maxX] = 0
return mask
def create_tissue_mask(self, wsi_pyramid, display_mask=False):
'''creates a tissue mask there tissue has the value 1 and background 0, based on thresholding.
input:
wsi_pyramid: a dictionary containing the WSI image on all maginification levels (in pyvips object format).
return:
tissue_mask'''
magnification = self.magnification_levels[self.level]
img = wsi_pyramid[magnification]
img = np.ndarray(buffer=img.write_to_memory(), dtype=np.uint8, shape=[img.height, img.width, img.bands])
img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
mask_HSV = cv2.inRange(img_hsv, (self.hue_min, 0, 0), (self.hue_max, 255, 255))
mask_HSV = self.remove_small_regions_from_mask(mask_HSV)
# Close the holes in the image
maskInv = cv2.bitwise_not(mask_HSV)
maskInv_closed = self.remove_small_regions_from_mask(maskInv)
mask_HSV = cv2.bitwise_not(maskInv_closed)
mask_HSV[mask_HSV>0]=1
self.mask_tissue = mask_HSV
if display_mask:
plt.imshow(self.mask_tissue)
plt.show()
def write_to_pickle(path, name, dictionary):
if name in os.listdir(path):
with open(path+name, 'rb') as f:
dict_ = pickle.load(f)
for key in dictionary:
dict_[key] = dictionary[key]
with open(path+name, 'wb') as f:
pickle.dump(dict_, f)
else:
with open(path+name, 'wb') as f:
pickle.dump(dictionary, f)
class Mask_from_annotations(Preprocess):
def __init__(self,
xml_path = 'xml/',
tags = {'lesion benign': ['lesion benign'],
'lesion malignant': ['lesion malign'],
'all malignant': ['ulceration', 'lesion malign', 'in situ', 'necrosis', 'til', 'mitosis'],
'normal tissue': ['normal epidermis','normal adnexal structure','normal adipose tissue'],
'ulceration': ['ulceration'],
'normal epidermis': ['normal epidermis']}):
self.tags = tags
self.xml_path = xml_path
self.xml_files = [file for file in os.listdir(self.xml_path) if 'MSc_benign' or 'SUShud51' in file]
super().__init__()
def extract_polygons(self, xml_file, scale=1.0):
'''Extracts polygons as coordinates from xml file.
inputs:
xml_file: an opened xml file, eg. file = open(xml_filename, 'r')
scale: float that will be multiplied with the coordate in order to scale to a lower or bigger image
returns:
dictionary with tags as keys and polygon coordinates as values, eg {'Lesion malign': array([[x1,y1],[x2,y2],...])}
'''
i=2
polygon_coordinates = {}
xml_tree = ET.parse(xml_file)
root = xml_tree.getroot()
for child in root[0][0]:
attributes = child.attrib
tag = attributes['tags']
coordinates = []
for coord in child[0]:
x = float(coord.attrib['X'])
y = float(coord.attrib['Y'])
coordinates.append(np.array([x,y]))
if tag not in polygon_coordinates.keys():
polygon_coordinates[tag]= (np.array(coordinates)*scale).astype(int)
else:
polygon_coordinates[tag+'_'+str(i)]= (np.array(coordinates)*scale).astype(int)
i+=1
return polygon_coordinates
def create_masks_from_polygons(self, polygons, size, plot=True):
mask_dictionary = {class_: np.zeros(size) for class_ in self.tags}
for tag in polygons:
tag_edit = tag.lower().split('_')[0]
for class_, values in self.tags.items():
if tag_edit in values:
mask_dictionary[class_]=cv2.drawContours(mask_dictionary[class_], np.array([polygons[tag]]), -1, 1, -1).astype('uint8')
if plot:
rows = math.ceil(len(mask_dictionary)/2)
plt.figure(figsize=(20,20))
for i, items in enumerate(mask_dictionary.items()):
class_, mask = items
plt.subplot(rows,2,i+1)
plt.imshow(mask,cmap='gray')
plt.title(class_)
plt.show()
return mask_dictionary
def store_masks_from_xml_annotations(self, wsi_pyramid, img_levels=[0,6], show_masks=False):
'''main function to store masks based on a WSI's xml annotation file
inputs:
wsi_pyramid: dictionary containing pyvips objects of WSI on all magnification levls.
img_levels: the highest magnification level 0 cooresponds to the true size and the lowest 6 corresponds desired mask size
return:
will store a dictionary with masks based on tags and annotations
will also save masks as images'''
for file in self.xml_files:
if 'MSc_Benign' in file:
if self.wsi_name[:-5] in file :
xml_filename = file
elif 'SUShud51' in file:
if self.wsi_name[:-5] in file :
xml_filename = file
scale_max = self.magnification_levels[min(img_levels)]
scale_min = self.magnification_levels[max(img_levels)]
scale = wsi_pyramid[scale_min].width/wsi_pyramid[scale_max].width
file = open(self.xml_path+xml_filename, 'r')
polygons = self.extract_polygons(file, scale)
file.close()
size = (wsi_pyramid[scale_min].height,wsi_pyramid[scale_min].width)
dictionary = self.create_masks_from_polygons(polygons, size, plot=show_masks)
write_to_pickle(self.wsi_dataset_file_path+self.wsi_name[:-5]+'/', 'masks.obj', dictionary)
for mask_name, mask in dictionary.items():
cv2.imwrite(self.wsi_dataset_file_path+self.wsi_name[:-5]+'/'+mask_name+'.png', mask*255)
def main(background_segmentation,
create_masks_from_annotations,
extract_patches,
save_tiles_as_jpeg,
xml_path,
wsi_path):
preprocess = Preprocess(wsi_path=wsi_path, save_tiles_as_jpeg=save_tiles_as_jpeg)
preprocess.make_directories()
if background_segmentation:
foreground_background_segmentation = Foreground_background_segmentation()
if create_masks_from_annotations:
mask_from_annotations = Mask_from_annotations(xml_path = xml_path)
for wsi_name in os.listdir(preprocess.wsi_path):
if wsi_name in preprocess.skip_list:
continue
#if 'SUShud78 -' not in wsi_name:
# continue
print(wsi_name)
#load wsi
preprocess.wsi_name = wsi_name
preprocess.load_wsi()
if background_segmentation:
foreground_background_segmentation.create_tissue_mask(preprocess.image_dictionary, display_mask=False)
cv2.imwrite(preprocess.wsi_dataset_file_path+wsi_name[:-5]+'/tissue_mask.png', foreground_background_segmentation.mask_tissue*255)
write_to_pickle(preprocess.wsi_dataset_file_path+wsi_name[:-5]+'/', 'masks.obj', {'tissue': foreground_background_segmentation.mask_tissue})
if create_masks_from_annotations:
mask_from_annotations.wsi_name = wsi_name
mask_from_annotations.store_masks_from_xml_annotations(preprocess.image_dictionary, show_masks=True)
if extract_patches:
#Initiate dictionary containing top left coordinates of all valid tiles of all requested magninficaions
coordinate_dict = dict()
#Load mask dict
preprocess.load_mask_dict()
if not preprocess.mask_dict:
print('no masks fround from {}'.format(preprocess.wsi_dataset_file_path+preprocess.wsi_name+'/'))
continue
if preprocess.mask_hierarchy:
preprocess.remove_mask_overlap()
# Create folder for each WSI to store output
os.makedirs(preprocess.output_folder+preprocess.wsi_name[:-5]+'/', exist_ok=True)
if preprocess.save_tiles_as_jpeg:
os.makedirs(preprocess.extracted_tiles_folder + preprocess.wsi_name[:-5], exist_ok=True)
#start timer
current_wsi_start_time = time.time()
list_of_valid_tiles_from_current_wsi = []
i = 0
#decrease tau if the image is bigger than threshold
preprocess.dynamic_TAU_size(threshold = 3*10**9)
#calculate area ratios and other mearuerments between the different levels of the wsi_pyramid
preprocess.image_pyramid_measurements()
#calculate the patch criteria to find valid patches
preprocess.patch_criteria()
# Loop through each tissue class to fit tiles on
class_to_remove = []
region_masks = dict()
tissue_classes_to_fit_tiles_on = preprocess.tissue_classes_to_fit_tiles_on.copy()
for current_class_to_copy in tissue_classes_to_fit_tiles_on:
print('Now processing {} regions'.format(current_class_to_copy))
# Extract mask for current class
current_class_mask = preprocess.mask_dict[current_class_to_copy].copy()
if len(np.unique(current_class_mask))==1:
print('Mask for class {} is empty'.format(current_class_to_copy))
#tissue_classes_to_fit_tiles_on.remove(current_class_to_copy)
class_to_remove.append(current_class_to_copy)
continue
# Resize colormap to the size of tau overview image
current_class_mask = cv2.resize(current_class_mask, dsize=(preprocess.tau_image_width, preprocess.tau_image_height), interpolation=cv2.INTER_CUBIC)
print('Loaded segmentation mask with size {} x {}'.format(current_class_mask.shape[1], current_class_mask.shape[0]))
#Add mask of current class to region_masks dictionary
region_masks[current_class_to_copy]=current_class_mask
# Save the annotation mask image (If option is set to True)
if preprocess.save_binary_annotation_mask:
#annotation_mask_for_saving = current_class_mask * 255
cv2.imwrite(preprocess.output_folder + preprocess.wsi_name[:-5] + '/mask_{}.jpg'.format(current_class_to_copy), current_class_mask * 255, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
labels = find_regions(current_class_mask, preprocess.small_region_remove_threshold, preprocess.remove_small_regions)
# Get region properties
list_of_regions = regionprops(labels)
if preprocess.remove_small_regions:
print('\tFound {} regions (after removal of small regions)'.format(len(list_of_regions)))
# Create a new binary map after removing small objects
region_masks[current_class_to_copy] = np.zeros(shape=current_class_mask.shape)
#Extract all coordinates (to draw region on overview image)
list_of_valid_tiles_from_current_class = []
for current_region in list_of_regions:
#list_of_valid_tiles_from_current_class = []
#Draw region on mask
if preprocess.remove_small_regions:
region_masks[current_class_to_copy][current_region.coords[:,0],current_region.coords[:,1]]=1
#Create a box around region
box_coorinates = box_around_region(current_region)
#Find potential tile coordinates
all_coordinates = preprocess.possible_x_and_y_coordinates(box_coorinates)
# image = np.zeros((region_masks[current_class_to_copy].shape[0],region_masks[current_class_to_copy].shape[1],3))
# image[:,:,0][region_masks[current_class_to_copy]==1.0]=255
# image[:,:,1][region_masks[current_class_to_copy]==1.0]=255
# image[:,:,2][region_masks[current_class_to_copy]==1.0]=255
# for x in all_x_pos:
# for y in all_y_pos:
# cv2.rectangle(image, (x, y), (x+preprocess.tile_size, y+preprocess.tile_size), (255, 0, 0), 1)
# plt.imshow(image)
# plt.show()
#Find accepted tile coordinates based on tile criteria
list_of_valid_tiles_from_current_class += preprocess.find_valid_tiles(all_coordinates, region_masks[current_class_to_copy])
plt.imshow(region_masks[current_class_to_copy], cmap='gray')
plt.title('{} mask after removal of small areas'.format(current_class_to_copy))
plt.show()
tiles_before_removal = len(list_of_valid_tiles_from_current_class)
#remove tiles that only contain background. This is not needed for the tissue mask, as it has already had background segmentation
if current_class_to_copy == 'tissue':
remove_tiles = False
else:
remove_tiles = True
coordinate_dict, list_of_valid_tiles_from_current_class = preprocess.create_coordinate_dictionary(coordinate_dict,
list_of_valid_tiles_from_current_class,
current_class_to_copy,
remove_tiles=remove_tiles,
threshold = 205)
tiles_after_removal = len(list_of_valid_tiles_from_current_class)
print(tiles_before_removal-tiles_after_removal, ' tiles removed due to low variance')
# Add the tiles to the list of tiles of current wsi
list_of_valid_tiles_from_current_wsi.extend(list_of_valid_tiles_from_current_class)
for i in class_to_remove:
tissue_classes_to_fit_tiles_on.remove(i)
#Move on to next WSI if there are no masks
if len(tissue_classes_to_fit_tiles_on)==0:
print('No masks found for WSI {}'.format(wsi_name))
continue
# Save predicted coordinates dict as pickle
preprocess.save_coordinates(coordinate_dict)
# Save overview image
filename = preprocess.output_folder + preprocess.wsi_name[:-5] + '/image_clean.jpeg'
preprocess.image_dictionary[preprocess.tau].jpegsave(filename, Q=100)
# Read overview image again using cv2, and add alpha channel to overview image.
overview_jpeg_file = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
#overview_jpeg_file = mask_overlay(overview_jpeg_file, region_masks['benign'])
overview_jpeg_file = np.dstack([overview_jpeg_file, np.ones((overview_jpeg_file.shape[0], overview_jpeg_file.shape[1]), dtype="uint8") * 255])
#Convert masks from 0-1 -> 0-255 (can also be used to set the color)
for n in tissue_classes_to_fit_tiles_on:
region_masks[n] *= 255
# Create a empty alpha channel
shape_for_alpha_channel = region_masks[tissue_classes_to_fit_tiles_on[0]].shape
alpha_channel = np.zeros(shape=shape_for_alpha_channel).astype(region_masks[tissue_classes_to_fit_tiles_on[0]].dtype)
#n_tissue_classes = len(tissue_classes_to_fit_tiles_on)
# Each mask is 1-channel, merge them to create a 3-channel image (RGB), the order is used to set the color for each mask. Add a alpha-channel.
#Draw transparent masks over the overview image
region_masks = mask_overlay(region_masks, alpha_channel)
# Draw the selected regions on the overview image
for _, current_tissue_mask in region_masks.items():
overview_jpeg_file = cv2.addWeighted(current_tissue_mask, 1, overview_jpeg_file, 1.0, 0, dtype=cv2.CV_64F)
# Draw tiles on the overview image
preprocess.draw_tiles_on_image(overview_jpeg_file, list_of_valid_tiles_from_current_wsi)
#save overview image
preprocess.save_overview_image(overview_jpeg_file, len(list_of_valid_tiles_from_current_wsi))
# Calculate elapse time for current run
elapse_time = time.time() - current_wsi_start_time
m, s = divmod(elapse_time, 60)
h, m = divmod(m, 60)
model_time = '%02d:%02d:%02d' % (h, m, s)
# Print out results
print('Found {} tiles in image'.format(len(list_of_valid_tiles_from_current_wsi)))
print('Finished. Duration: {}'.format(model_time))
if __name__ == '__main__':
main(background_segmentation=False,
create_masks_from_annotations = False,
extract_patches = True,
save_tiles_as_jpeg = False,
#xml_path ='xml/',
xml_path = '/home/prosjekt/Histology/.xmlStorage/',
#wsi_path = 'WSI_all/')
wsi_path = '/home/prosjekt/Histology/Melanoma_SUS/MSc_Benign_Malign_HE/')