/
portion-plan-quality-control.py
214 lines (169 loc) · 9.67 KB
/
portion-plan-quality-control.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import numpy
import cv2
import os
import ctypes
import shutil
import subprocess
from subprocess import Popen, PIPE
import stat
import shlex
import cv2
from cv2 import cv
import random
import sets
INPUT_PATH = 'C:\Users\control\Documents\Parishes'
OUTPUT_PATH = 'C:\Users\control\Documents\Output'
# FUNCTIONS
def errexit(err, str):
sys.stderr.write("%s\n" % str)
sys.exit(err)
return # actually never return
# http://stackoverflow.com/questions/7853628/how-do-i-find-an-image-contained-within-an-image
# Checks to see if image 1 exists within image 2
# if found, a red box is drawn on image 2 and output jpeg written to JPEG directory
# a confidence is also returned, as to whether or not image 1 was found
def find_image_and_write_jpeg(small_image_path, large_image_path, output_image_path, method_id):
if(method_id==1):
method = cv.CV_TM_SQDIFF_NORMED
method_string = 'CV_TM_SQDIFF_NORMED'
# Read the images from the file
small_image = cv2.imread(small_image_path)
large_image = cv2.imread(large_image_path)
result = cv2.matchTemplate(small_image, large_image, method)
# We want the minimum squared difference
mn,_,mnLoc,_ = cv2.minMaxLoc(result)
# Draw the rectangle:
# Extract the coordinates of our best match
MPx,MPy = mnLoc
results_file_object.write(portion_plan + ',' + str(mn) + ',' + method_string + '\n')
# Step 2: Get the size of the template. This is the same size as the match.
trows,tcols = small_image.shape[:2]
# Step 3: Draw the rectangle on large_image
cv2.rectangle(large_image, (MPx,MPy),(MPx+tcols,MPy+trows),(0,0,255),2)
cv2.imwrite(output_image_path,large_image)
# Check input path exists
if (os.path.isdir(INPUT_PATH) is False):
errexit(1, "Specified path: %s not found" % INPUT_PATH)
# Check output directory exists, create if not
if os.path.exists(OUTPUT_PATH):
os.chmod(OUTPUT_PATH,stat.S_IWRITE)
shutil.rmtree(OUTPUT_PATH, ignore_errors=True)
os.mkdir(OUTPUT_PATH)
# Check working directory exists, create if not.
working_path = OUTPUT_PATH.replace('Output','Working') # Has to be a better way to do this.
if os.path.exists(working_path):
os.chmod(working_path,stat.S_IWRITE)
shutil.rmtree(working_path, ignore_errors=True)
os.mkdir(working_path)
# Walk directory, Create directory path/name/ext dict
for dirName,subdirList,parish_list in os.walk(INPUT_PATH):
print('Found directory: %s' % dirName)
# Make output directory
parish_string = dirName.split('\\')[len(dirName.split('\\'))-1]
# Make output directories
os.mkdir(OUTPUT_PATH + '\\' + parish_string)
os.mkdir(OUTPUT_PATH + '\\' + parish_string + '\\TIF')
os.mkdir(OUTPUT_PATH + '\\' + parish_string + '\\JPEG')
os.mkdir(OUTPUT_PATH + '\\' + parish_string + '\\THRESH')
results_file_name = OUTPUT_PATH + '\\' + parish_string + '\\' + parish_string + '_log.txt'
results_file_object = open(results_file_name, 'w+')
for portion_plan in parish_list:
if ('pdf' in portion_plan):
print(parish_string + ' ' + portion_plan)
out_portion_plan_path = OUTPUT_PATH + '\\' + parish_string + '\\TIF\\' + portion_plan
in_portion_plan_path = INPUT_PATH + '\\' + parish_string + '\\' + portion_plan
out_portion_plan_path_tif = out_portion_plan_path.replace('.pdf', '.tif').replace(' ','').replace('&','and')
# Extract metadata from the image using gdal
subprocess.call( ['C:\Program Files (x86)\GDAL\gdalinfo.exe', \
in_portion_plan_path, \
'>', working_path + "\gdalinfo_output.txt" ], shell=True)
gdalinfo = open(working_path + "\\gdalinfo_output.txt", 'r').readlines()
# Exract image size from metadata
for metadata_line in gdalinfo:
if ('Size' in metadata_line):
size_str = metadata_line
size = size_str.replace('Size is ','').replace('\n','').replace(' ','').split(',')
nCols = int(size[0])
nRows = int(size[1])
print('Size: ' + str(nCols) + ', ' + str(nRows))
# Determine scanned DPI for foolscap standard size
# Standard portion plan dimensions are: (216mm wide x 346mm high) or (8.503937007874017inches wide x 13.622047244094489 high)
# 1 inch = 25.4mm
# Standard size: 1754px, 2480px
# therefore DPI = 1754px/8.503937007874017inches = 1754/(216/25.4) = 206.25740740740738 DPI = 200DPI
# NB: Stamp size 80mm w x 21mmm h
# ImageMagik command to covert pdf to tif
# http://www.imagemagick.org/script/convert.php
subprocess.call( ['convert', \
'-colorspace', 'rgb', \
'-density', '500', \
in_portion_plan_path, \
'-trim', \
'-resize', '25%', \
out_portion_plan_path_tif], shell=True)
# ImageMagik command to resize image to orriginal dimensions
# http://www.imagemagick.org/script/convert.php
out_portion_plan_path_tif_resize = out_portion_plan_path_tif.replace('.tif', '_resize.tif')
resize_str = str(nCols) + 'x' + str(nRows)
z = r'%s!' % resize_str
subprocess.call( ['convert', \
out_portion_plan_path_tif, \
'-resize', z, \
out_portion_plan_path_tif_resize], shell=True)
# import resized image as is. This allows the image type (RGB or Gray) to be determined
img = cv2.imread(out_portion_plan_path_tif_resize, \
cv2.CV_LOAD_IMAGE_COLOR | \
cv2.CV_LOAD_IMAGE_UNCHANGED) # load image in colour
# Process image based on type
# Gray
if(img.dtype == 'uint8'):
method_id = 1
small_image_path = 'C:\\PortionPlanQC\\Aaron Portion Plans\\CERTIFY6.tif'
large_image_path = out_portion_plan_path_tif_resize
output_image_path = out_portion_plan_path.replace('TIF', 'JPEG').replace('.pdf', '.jpeg')
find_image_and_write_jpeg(small_image_path, large_image_path, output_image_path, method_id)
# RGB
elif(img.dtype == 'uint16'):
# Delete tif & resized color image, as it was only needed to determine image type
os.remove(out_portion_plan_path_tif_resize)
os.remove(out_portion_plan_path_tif)
# Recreate RGB tif at higher resolution
subprocess.call( ['convert', \
'-colorspace', 'rgb', \
'-density', '500', \
in_portion_plan_path, \
'-trim', \
'-resize', '50%', \
out_portion_plan_path_tif], shell=True)
# Convert to Gray and create threshold image, which is used as mask
img2 = cv2.imread(out_portion_plan_path_tif,0) # 0 flg loads image in greyscale
ret,thresh = cv2.threshold(img2,125,255,cv2.THRESH_BINARY)
#output_image_path = out_portion_plan_path.replace('TIF', 'THRESH').replace('.pdf', '.jpeg')
#cv2.imwrite(output_image_path,thresh)
# Load image as RGB into cv2
img3 = cv2.imread(out_portion_plan_path_tif, \
cv2.CV_LOAD_IMAGE_COLOR | \
cv2.CV_LOAD_IMAGE_UNCHANGED)
# Create mask
# Iterate through thresh and make all values less than 35 zero.
row_index = 0
for row_list_i in thresh:
index = 0
for value_i in row_list_i:
if (value_i > 220):
img3[row_index][index][0] = 65535
img3[row_index][index][1] = 65535
img3[row_index][index][2] = 65535
index = index + 1
else:
index = index + 1
row_index = row_index + 1
output_image_path = out_portion_plan_path.replace('TIF', 'THRESH').replace('.pdf', '.tif')
cv2.imwrite(output_image_path,img3)
# Analyse blue band for presence of stamp.
method_id = 1
small_image_path = 'C:\\PortionPlanQC\\Aaron Portion Plans\\CERTIFY20.tif'
large_image_path = output_image_path
output_image_path = out_portion_plan_path.replace('TIF', 'JPEG').replace('.pdf', '.jpeg')
find_image_and_write_jpeg(small_image_path, large_image_path, output_image_path, method_id)
results_file_object.close()