forked from bglnelissen/slideToolkit
-
Notifications
You must be signed in to change notification settings - Fork 1
/
slideExtract.py
executable file
·172 lines (146 loc) · 9.27 KB
/
slideExtract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python3
# script to extract a thumbnails and macros whole-slide image files (*.TIF, *.NDPI, etc.)
#
# Ref: https://github.com/choosehappy/Snippets/blob/master/extract_macro_level_from_wsi_image_openslide_cli.py
#
"""
slideExtract
This script is designed to extract thumbnails and macro-images from whole-slide image (WSI) files,
such as TIF or NDPI files. The script uses the OpenSlide library to handle WSI and OpenCV for image processing.
Usage:
python slideExtract.py -i/--input -l/--levels; optional: -d/--display -o/--outdir -s/--suffix -t/--type -f/--force -v/--verbose; for help: -h/--help
Example usage:
python slideExtract.py -i IMG012.ndpi -l m,6
Arguments:
--input, -i Input (directory containing files). Try: IMG012.ndpi (or *.TIF or /path_to/images/*.ndpi).
---levels, -l Comma separated list of magnification levels to extract, with 'm' as thumbnail and '6' as level 6. Try: m,6.
---display, d Display the image. Optional.
---outdir, -o Output directory. Optional.
---suffix, -s Suffix to append to end of file, default is 'm' for thumbnail and '#' for a given level. Optional.
---type, -t Output file type, default is png (which is slower), other options are tif. Optional.
---force, -f Force output even if it exists. Optional.
---verbose, -v While writing images also display image properties. Optional.
---debug, -de Debug mode. Optional.
---version, -V Print version number and exit.
---help, -h Print help.
"""
# Version information
VERSION_NAME = 'slideExtract'
VERSION = '1.0.5'
VERSION_DATE = '2023-01-08'
COPYRIGHT = 'Copyright 1979-2024. Sander W. van der Laan | s.w.vanderlaan [at] gmail [dot] com | https://vanderlaanand.science.'
COPYRIGHT_TEXT = f'\nThe MIT License (MIT). \n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and \nassociated documentation files (the "Software"), to deal in the Software without restriction, \nincluding without limitation the rights to use, copy, modify, merge, publish, distribute, \nsublicense, and/or sell copies of the Software, and to permit persons to whom the Software is \nfurnished to do so, subject to the following conditions: \n\nThe above copyright notice and this permission notice shall be included in all copies \nor substantial portions of the Software. \n\nTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, \nINCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR \nPURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS \nBE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, \nTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE \nOR OTHER DEALINGS IN THE SOFTWARE. \n\nReference: http://opensource.org.'
# import required packages
import numpy as np
import glob
from pathlib import Path
# for argument parser
import argparse
from argparse import RawTextHelpFormatter
import textwrap
# for openslide/openCV
import cv2
import os
import openslide
from openslide import *
# Define main function
def main():
parser = argparse.ArgumentParser(description=f'''
+ {VERSION_NAME} v{VERSION} +
slideExtract
This script is designed to extract thumbnails and macro-images from whole-slide image (WSI) files,
such as TIF or NDPI files. The script uses the OpenSlide library to handle WSI and OpenCV for image processing.
Usage:
python slideExtract.py -i/--input -l/--levels; optional: -d/--display -o/--outdir -s/--suffix -t/--type -f/--force -v/--verbose; for help: -h/--help
Example usage:
python slideExtract.py -i IMG012.ndpi -l m,6
''',
epilog=f'''
+ {VERSION_NAME} v{VERSION}. {COPYRIGHT} \n{COPYRIGHT_TEXT}+''',
formatter_class=argparse.RawTextHelpFormatter)
requiredNamed = parser.add_argument_group('required named arguments')
requiredNamed.add_argument('-i','--input', help="Input (directory containing files). Try: IMG012.ndpi (or *.TIF or /path_to/images/*.ndpi).", nargs="*")#, required=True)
requiredNamed.add_argument('-l','--levels', help="Comma separated list of magnification levels to extract, with 'm' as thumbnail and '6' as level 6. Try: m,6.")#, required=True)
parser.add_argument('-o', '--outdir', help="Output dir, default is present working directory.", default="<<SAME>>", type=str)
parser.add_argument('-s', '--suffix', help="Suffix to append to end of file, default is 'm' for thumbnail and '#' for a given level.", default="", type=str)
parser.add_argument('-t', '--type', help="Output file type, default is png (which is slower), other options are tif.", default="png", type=str)
parser.add_argument('-f', '--force', help="Force output even if it exists.", default=False, action="store_true")
parser.add_argument('-v', '--verbose', help="While writing images also display image properties.", default=False, action="store_true")
parser.add_argument('-de', '--debug', help="Debug mode.", default=False, action="store_true")
parser.add_argument('--version', '-V', action='version', version=f'%(prog)s {VERSION} ({VERSION_DATE}).')
args = parser.parse_args()
if not args.input or not args.levels:
print("\nOh, computer says no! You must supply correct arguments when running a *** slideExtract ***!")
print("Note that -i/--input and -l/--levels are required. Try: --input IMG012.ndpi (or *.TIF or /path_to/images/*.ndpi) --level m,6.\n")
parser.print_help()
exit()
# Start the script
print(f"+ {VERSION_NAME} v{VERSION} ({VERSION_DATE}) +")
print(f"\nExtract thumbnails and macro images from WSI file(s).\n")
if len(args.input) > 1: # bash has sent us a list of files
if args.verbose:
print("Processing multiple files.")
files = args.input
else: # user sent us a wildcard, need to use glob to find files
if args.verbose:
print("Processing wildcard to find matching files or a specific file.")
files = glob.glob(args.input[0])
for fname in files:
# Creating the output directory. By default it will write to the input directory;
# that is to say, the directory given after -i/--input.
if args.outdir == "<<SAME>>":
if args.verbose:
print("Output directory set to [",os.path.dirname(os.path.realpath(fname)),"].\n")
args.outdir = os.path.dirname(os.path.realpath(fname))
else:
if args.verbose:
print("Output directory set to [",args.outdir,"].\n")
os.makedirs(args.outdir, exist_ok=True)
# Creating the output filename. By default it will write to the input filename;
# that is to say, the filename given after -i/--input.
# Extract file name without extension
fname_base = Path(fname).stem
# Create the output filename with optional suffix and type
fname_suffix = args.suffix if args.suffix else ''
fname_type = args.type if args.type else ''
fnameout = f"{fname_base}{fname_suffix}.LEVEL.{fname_type}"
fnameout=os.path.join(args.outdir,fnameout)
# Open the file and extract the image
fimage=openslide.OpenSlide(fname)
# Extract the image
for level in args.levels.split(","):
# set the level
if level == 'm':
if args.verbose:
print("Processing [",fname,"] at thumbnail level (m).")
img = fimage.associated_images["macro"]
if args.debug:
print(f">>> Debug. Checking if thumbnail exists: ", os.path.join(args.outdir,f"{fname_base}{fname_suffix}.{level}.{fname_type}"))
if not args.force and os.path.exists(os.path.join(args.outdir,f"{fname_base}{fname_suffix}.{level}.{fname_type}")):
print(f"Skipping thumbnail exists and --force is not set")
continue
else:
if args.verbose:
print("Processing [",fname,"] at level [",level,"].")
level = int(level)
img = fimage.read_region((0, 0), level, fimage.level_dimensions[level])
if args.debug:
print(f">>> Debug. Checking if macro file exists: ", os.path.join(args.outdir,f"{fname_base}{fname_suffix}.{level}.{fname_type}"))
if not args.force and os.path.exists(os.path.join(args.outdir,f"{fname_base}{fname_suffix}.{level}.{fname_type}")):
print(f"Skipping macro file exists and --force is not set")
continue
img = np.asarray(img)[:,:, 0:3]
# Display information about the image
if args.verbose:
print('* image dimensions (height x width in pixels):', img.shape)
img_size = img.size/1024 # to get kilobytes
print('* image size:', '{:,.2f}'.format(img_size), 'KB') # to get Kb
print("Writing image for [",fname,"] at level [",level,"].\n")
# Write the image
cv2.imwrite(str(fnameout).replace("LEVEL",str(level)),cv2.cvtColor(img,cv2.COLOR_RGB2BGR))
if __name__ == "__main__":
main()
# Print the version number
print(f"\n+ {VERSION_NAME} v{VERSION} ({VERSION_DATE}). {COPYRIGHT} +")
print(f"{COPYRIGHT_TEXT}")
# End of file