Skip to content
This repository has been archived by the owner on May 29, 2021. It is now read-only.

Feature/image dilation #16

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
55 changes: 50 additions & 5 deletions train_ocr/preprocess.py
@@ -1,6 +1,9 @@
import cv2
import logging
import os
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt

logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')

Expand Down Expand Up @@ -38,7 +41,7 @@ def _preprocess_image(self, path):
"""
Read the image

Step 1: Convert image to grayscase
Step 1: Convert image to grayscale
Step 2: Threshold image to remove random jitter
Step 3: Remove vertical and horizontal lines

Expand All @@ -47,16 +50,58 @@ def _preprocess_image(self, path):
:return: N/A
"""
img = cv2.imread(path)
# TODO : Suchir to add image pre processing code here
# After all the steps are done, you need to write the
# processed image to sample*_processed.jpg

img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# adaptive gaussian much clearer, better details, than adaptive mean

# tested adaptive threshold with box size: 11, 25, 33, 39, 41, 51
# quality increases between 11-33, and decreases from 33-51
# decided to go with adaptive thresholding; compared with linear thresholding
# compared with tozero and otsu filters
# vertical and horizontal lines appear clearer with adaptive thresholding

# for the c-value, we tried: -7, 0, 1, 2, 3, 8, 10, 13
# 1, 2 were yielding best results, most amount of detail

thresh_img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 33, 1)

# Length of contour
L = 15
# Thickness of the contour
T = 1
# Iterations
I = 2

# Remove horizontal
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (L, T))
detected_lines = cv2.morphologyEx(thresh_img, cv2.MORPH_OPEN, kernel, iterations=I)
h_cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
h_cnts = h_cnts[0] if len(h_cnts) == 2 else h_cnts[1]

# Remove vertical
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (T, L))
detected_lines = cv2.morphologyEx(thresh_img, cv2.MORPH_OPEN, kernel, iterations=I)
v_cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
v_cnts = v_cnts[0] if len(v_cnts) == 2 else v_cnts[1]

for c in h_cnts + v_cnts:
cv2.drawContours(img, [c], -1, (255, 255, 255), 4)

# keeping iterations = 1 because anything else is further dilating it
# (2,2) and above is way to dilated, losing detail
# decided to go with (2,1), but both (2,1) and (1,2) are very close
# dilating removes the clutter

kernel = np.ones((2,1),np.uint8)
dilated_img = cv2.dilate(img,kernel,iterations =1)
pass

def _process_state(self, state, path):
logging.info('Processing state: %s' % state)
samples = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
for sample in samples:
sample_path = os.path.join(path, sample)
sample_path = os.path.join(path,sample)
logging.warning('Processing: %s' % sample_path)
self._preprocess_image(sample_path)

Expand Down