sudoku_extractor.py

import numpy as np
import cv2
import operator
import matplotlib.pyplot as plt

def show_image(img):
    return img

def show_digits(digits, colour=255):
    rows = []
    with_border = [cv2.copyMakeBorder(img.copy(), 1, 1, 1, 1, cv2.BORDER_CONSTANT, None, colour) for img in digits]
    for i in range(9):
        row = np.concatenate(with_border[i * 9:((i + 1) * 9)], axis=1)
        rows.append(row)
    img = show_image(np.concatenate(rows))
    return img

def pre_process_image(img, skip_dilate=False):
    gaussian_blur = cv2.GaussianBlur(img.copy(), (9,9), 0)
    adaptive_threshold = cv2.adaptiveThreshold(gaussian_blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    invert_img = cv2.bitwise_not(adaptive_threshold, adaptive_threshold)
    final = invert_img
    if not skip_dilate:
        dilate_kernel = np.array([[0.,1.,0.], [1.,1.,1.], [0.,1.,0.]], np.uint8)
        dilated = cv2.dilate(invert_img, dilate_kernel)
        final = dilated
    return final

def extract_corners(img):
    contours, hierarchy = cv2.findContours(img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    largest_area_contours = sorted(contours, key = cv2.contourArea, reverse=True)[0]

    #bottom_right --> largest x+y
    #bottom_left --> largest x-y
    #top_left --> smallest x+y
    #top_right --> smallest x-y
    bottom_right,_ = max(enumerate([pt[0][0]+pt[0][1] for pt in largest_area_contours]), key=operator.itemgetter(1))
    top_left,_ = min(enumerate([pt[0][0]+pt[0][1] for pt in largest_area_contours]), key=operator.itemgetter(1))
    bottom_left,_ = min(enumerate([pt[0][0]-pt[0][1] for pt in largest_area_contours]), key=operator.itemgetter(1))
    top_right,_ = max(enumerate([pt[0][0]-pt[0][1] for pt in largest_area_contours]), key=operator.itemgetter(1))
    
    return [largest_area_contours[top_left][0], largest_area_contours[top_right][0], largest_area_contours[bottom_right][0], largest_area_contours[bottom_left][0]]

def vector_dist(pt1, pt2):
    return (np.sqrt((pt2[0]-pt1[0])**2 + (pt2[1]-pt1[1])**2))

def crop_warp(img, corners):
    top_left, top_right, bottom_right, bottom_left = corners[0], corners[1], corners[2], corners[3]
    src = np.array([top_left, top_right, bottom_right, bottom_left], dtype='float32')
    max_side = max([
        vector_dist(bottom_right, top_right),
        vector_dist(top_left, bottom_left),
        vector_dist(bottom_right, bottom_left),
        vector_dist(top_left, top_right)
    ])
    dst = np.array([[0, 0], [max_side - 1, 0], [max_side - 1, max_side - 1], [0, max_side - 1]], dtype='float32')
    transformation_matrix = cv2.getPerspectiveTransform(src, dst)
    return cv2.warpPerspective(img, transformation_matrix, (int(max_side), int(max_side)))

def infer_grid(img):
	squares=[]
	side = img.shape[:1]
	side = side[0]/9
	for j in range(9):
		for i in range(9):
			p1 = (i*side, j*side)
			p2 = ((i+1)*side, (j+1)*side)
			squares.append((p1,p2))
	return squares

def cut_from_rect(img, rect):
	return img[int(rect[0][1]):int(rect[1][1]), int(rect[0][0]):int(rect[1][0])]

def scale_and_centre(img, size, margin=0, background=0):
	h, w = img.shape[:2]
	def centre_pad(length):
		if length % 2 == 0:
			side1 = int((size - length) / 2)
			side2 = side1
		else:
			side1 = int((size - length) / 2)
			side2 = side1 + 1
		return side1, side2

	def scale(r, x):
		return int(r * x)

	if h > w:
		t_pad = int(margin / 2)
		b_pad = t_pad
		ratio = (size - margin) / h
		w, h = scale(ratio, w), scale(ratio, h)
		l_pad, r_pad = centre_pad(w)
	else:
		l_pad = int(margin / 2)
		r_pad = l_pad
		ratio = (size - margin) / w
		w, h = scale(ratio, w), scale(ratio, h)
		t_pad, b_pad = centre_pad(h)

	img = cv2.resize(img, (w, h))
	img = cv2.copyMakeBorder(img, t_pad, b_pad, l_pad, r_pad, cv2.BORDER_CONSTANT, None, background)
	return cv2.resize(img, (size, size))

def find_largest_feature(inp_img, scan_tl=None, scan_br=None):
	img = inp_img.copy()  
	height, width = img.shape[:2]

	max_area = 0
	seed_point = (None, None)

	if scan_tl is None:
		scan_tl = [0, 0]

	if scan_br is None:
		scan_br = [width, height]

	for x in range(scan_tl[0], scan_br[0]):
		for y in range(scan_tl[1], scan_br[1]):
			if img.item(y, x) == 255 and x < width and y < height:
				area = cv2.floodFill(img, None, (x, y), 64)
				if area[0] > max_area:
					max_area = area[0]
					seed_point = (x, y)

	for x in range(width):
		for y in range(height):
			if img.item(y, x) == 255 and x < width and y < height:
				cv2.floodFill(img, None, (x, y), 64)

	mask = np.zeros((height + 2, width + 2), np.uint8) 

	if all([p is not None for p in seed_point]):
		cv2.floodFill(img, mask, seed_point, 255)

	top, bottom, left, right = height, 0, width, 0

	for x in range(width):
		for y in range(height):
			if img.item(y, x) == 64:  
				cv2.floodFill(img, mask, (x, y), 0)

			if img.item(y, x) == 255:
				top = y if y < top else top
				bottom = y if y > bottom else bottom
				left = x if x < left else left
				right = x if x > right else right

	bbox = [[left, top], [right, bottom]]
	return img, np.array(bbox, dtype='float32'), seed_point


def extract_digit(img, rect, size):
	digit = cut_from_rect(img, rect)
	h, w = digit.shape[:2]
	margin = int(np.mean([h, w]) / 2.5)
	_, bbox, seed = find_largest_feature(digit, [margin, margin], [w - margin, h - margin])
	digit = cut_from_rect(digit, bbox)

	w = bbox[1][0] - bbox[0][0]
	h = bbox[1][1] - bbox[0][1]
	if w > 0 and h > 0 and (w * h) > 100 and len(digit) > 0:
		return scale_and_centre(digit, size, 4)
	else:
		return np.zeros((size, size), np.uint8)


def get_digits(img, squares, size):
    digits = []
    img = pre_process_image(img.copy(), skip_dilate=True)
    for square in squares:
        digits.append(extract_digit(img, square, size))
    return digits


def parse_grid(path):
    original = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    processed = pre_process_image(original)
    corners = extract_corners(processed)
    cropped = crop_warp(original, corners)
    squares = infer_grid(cropped)
    digits = get_digits(cropped, squares, 28)
    final_image = show_digits(digits)
    return final_image

def extract_sudoku(image_path):
    final_image = parse_grid(image_path)
    return final_image