Initial commit

- find receipt
 - find lines
This commit is contained in:
Caesar2011
2019-03-16 17:06:40 +01:00
commit 8626a2db01
33 changed files with 1295 additions and 0 deletions

203
.gitignore vendored Normal file
View File

@@ -0,0 +1,203 @@
# custom
result/*
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that dont work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

4
.idea/encodings.xml generated Normal file
View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" addBOMForNewFiles="with NO BOM" />
</project>

View File

@@ -0,0 +1,28 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="1">
<item index="0" class="java.lang.String" itemvalue="PIL" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="E501" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="int.dot" />
</list>
</option>
</inspection_tool>
</profile>
</component>

7
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (/usr/bin/python3.5)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/receipt-recognition.iml" filepath="$PROJECT_DIR$/.idea/receipt-recognition.iml" />
</modules>
</component>
</project>

6
.idea/other.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW" value="true" />
</component>
</project>

13
.idea/receipt-recognition.iml generated Normal file
View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/data" />
</content>
<orderEntry type="jdk" jdkName="Python 3.5.2 (/usr/bin/python3.5)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>

6
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

0
README.md Normal file
View File

BIN
data/receipt-01.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

BIN
data/receipt-02.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

BIN
data/receipt-03.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 754 KiB

BIN
data/receipt-04.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 331 KiB

BIN
data/receipt-05.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 156 KiB

BIN
data/receipt-06.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 868 KiB

BIN
data/receipt-07.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

BIN
data/receipt-08.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 MiB

BIN
data/receipt-09.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 940 KiB

BIN
data/receipt-10.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

BIN
data/receipt-11.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 MiB

BIN
data/receipt-12.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

20
main.py Normal file
View File

@@ -0,0 +1,20 @@
import glob
import os
from src.processing.loader import load_image, save_image
from src.processing.linefinder import find_lines, preparation
from matplotlib import pyplot as plt
from src.processing.receiptcutter import cut_receipt
for root, dirs, files in os.walk("data"):
for file in files:
if file.startswith("receipt-08"):
image = load_image("data/"+file)
receipt = cut_receipt(image, draw_steps=True)
plt.imshow(receipt)
plt.show()
lines = find_lines(receipt)
for line in lines:
plt.imshow(line, cmap="gray")
plt.show()

6
requirements.txt Normal file
View File

@@ -0,0 +1,6 @@
imageio
PIL
scikit-image
scipy
numpy
matplotlib

0
src/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,15 @@
import numpy as np
def rgb2gray(image):
if image.shape[2] == 4:
return image.dot(np.array([0.2627, 0.6780, 0.0593, 0]) / 255)
else:
return image.dot(np.array([0.2627, 0.6780, 0.0593]) / 255)
def rgb2gray_value(image):
image = image[:, :, :3]
maxc = np.maximum(np.maximum(image[:, :, 0], image[:, :, 1]), image[:, :, 2])/255
minc = np.minimum(np.minimum(image[:, :, 0], image[:, :, 1]), image[:, :, 2])/255
return maxc - minc

View File

@@ -0,0 +1,166 @@
import numpy as np
from scipy.ndimage import measurements
from src.processing.imageprocessing import rgb2gray
from src.processing.loader import load_numpy, save_numpy, save_image, load_image
from src.utils.cmap_generator import rand_cmap, list_cmap
from matplotlib import pyplot as plt
def find_lines(image):
gray, binary, magnitude = preparation(image)
plt.imshow(binary, cmap="gray")
plt.show()
backtrack = load_numpy("result/backtrack.npz")
if backtrack is None:
energy, backtrack = minimum_seam(binary, magnitude)
save_numpy("result/backtrack.npz", backtrack)
save_image("result/gray.png", gray)
seams = calculate_seams(backtrack)
labeled, ncomponents = group_empty_boxes(seams)
return generate_lines(labeled, ncomponents, gray)
def preparation(image):
gray = rgb2gray(image)
cnt, vals = np.histogram(gray, 256)
threshold = get_threshold(cnt)/256*0.96
binary = (gray > threshold).astype(np.int_)
magnitude = np.ones_like(binary)-binary
imin = 0
while np.sum(binary[:, imin]) / binary.shape[0] < 0.6:
imin += 1
imax = binary.shape[1]
while np.sum(binary[:, imax-1]) / binary.shape[0] < 0.6:
imax -= 1
jmin = 0
while np.sum(binary[jmin]) / binary.shape[1] < 0.6:
jmin += 1
jmax = binary.shape[0]
while np.sum(binary[jmax-1]) / binary.shape[1] < 0.6:
jmax -= 1
return gray[jmin:jmax, imin:imax], binary[jmin:jmax, imin:imax], magnitude[jmin:jmax, imin:imax]
def get_threshold(hist, thresh=None):
# ISO data algorithm
# https://felixniklas.com/imageprocessing/binarization
if thresh is None:
thresh = hist.shape[0] // 2
m1 = median(hist, 0, thresh)
m2 = median(hist, thresh, hist.shape[0])
tk = int((m1 + m2) / 2)
if thresh == tk:
return np.round(tk)
else:
return get_threshold(hist, tk)
def median(values, start, stop):
p, x = 0, 0
for idx, val in enumerate(values[start:stop], start=start):
p += val
x += idx*val
if p == 0:
return start if start != 0 else stop+1
return x/p
def minimum_seam(img, energy_map):
r, c = img.shape
M = energy_map.copy()
backtrack = np.zeros_like(M, dtype=np.int)
for j in range(1, c):
for i in range(0, r):
# Handle the top edge of the image, to ensure we don't index -1
if i == 0:
idx = np.argmin(M[i:i+2, j-1])
backtrack[i, j] = idx + i
min_energy = M[idx+i, j-1]
if idx > 0:
min_energy += 1
else:
m = M[i-1:i+2, j-1]
idx = (np.argmin(np.roll(m, 2)) - 2) % len(m)
backtrack[i, j] = idx + i - 1
min_energy = M[idx+i-1, j-1]
if idx != 1:
min_energy += 1
M[i, j] += min_energy
return M, backtrack
def calculate_seams(links):
h, w = links.shape
seams = np.zeros_like(links)
seams[:, w-1] = 1
for x in range(w-1, 0, -1):
for y in range(h):
seams[links[y, x], x-1] += seams[y, x]
return seams
def group_empty_boxes(seams):
clouds = 1 - np.minimum(seams, 1)
structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype=np.int)
labeled, ncomponents = measurements.label(clouds, structure)
return labeled, ncomponents
def generate_lines(labeled, ncomponents, gray):
plt.imshow(labeled, cmap=rand_cmap(ncomponents, type='hard', first_color_black=True, last_color_black=False, verbose=False))
plt.show()
plt.imsave("result/groups.png", labeled, cmap=rand_cmap(ncomponents, type='hard', first_color_black=True, last_color_black=False, verbose=False))
groups = np.copy(labeled)
group_id = 1
entries = []
pixelgroup = None
in_top_mode = True
indices = np.indices(labeled.shape).T[:, :, [1, 0]]
indices = np.swapaxes(indices, 0, 1)
colors = [[0, 0, 0]]
for label in range(1, ncomponents+1):
pixel = indices[labeled == label]
minp = np.min(pixel, axis=0)
maxp = np.max(pixel, axis=0)
right_pixel = pixel[pixel[:, 0] == maxp[0]][:, 1]
second_pixel = pixel[pixel[:, 0] == maxp[0]-1][:, 1]
color = [0, 0, 1]
if second_pixel.size > 0 and min(right_pixel) > min(second_pixel):
# up
color[0] = 1
if not in_top_mode:
minps = np.min(pixelgroup, axis=0)
maxps = np.max(pixelgroup, axis=0)
if pixel.shape[0] < 20 or maxps[1]-minps[1] < 5 or maxps[0]-minps[0] < 5:
continue
entry = gray[minps[1]:maxps[1]+1, minps[0]:maxps[0]+1]
pixelgroup = np.subtract(pixelgroup, minps)
white = np.ones_like(entry) * np.max(entry)
white[pixelgroup[:, 1], pixelgroup[:, 0]] = entry[pixelgroup[:, 1], pixelgroup[:, 0]]
entries.append(white)
pixelgroup = None
group_id += 1
in_top_mode = True
if second_pixel.size > 0 and max(right_pixel) < max(second_pixel):
# down
color[1] = 1
in_top_mode = False
groups[labeled == label] = group_id
if pixelgroup is None:
pixelgroup = pixel[:, :]
else:
pixelgroup = np.concatenate((pixelgroup, pixel))
colors.append(color)
#plt.imsave("result/groups_types.png", labeled,
# cmap=list_cmap(np.array(colors)))
#g = np.array(load_image("result/groups_types.png")[:, :, :3], dtype="float")
#b = load_image("result/gray.png")
#t = (g[:, :, :3] + np.tile(b[:, :], (3, 1, 1)).swapaxes(2, 0).swapaxes(1, 0) * 1) / 2
#t = (g[:, :, :3] + np.tile(b[:, :], (3, 1, 1)).swapaxes(2, 0).swapaxes(1, 0) * 1) / 2
#save_image("result/combined_types.png", t/255)
#plt.imsave("result/combined_new.png", groups,
# cmap=rand_cmap(500, type='hard', first_color_black=True, last_color_black=False, verbose=False))
return entries

24
src/processing/loader.py Normal file
View File

@@ -0,0 +1,24 @@
import os
import imageio
import numpy as np
def load_image(path):
image = imageio.imread(path)
return image
def save_image(path, image):
x = np.array(image)
imageio.imsave(path, x)
def save_numpy(path, array):
np.savez(path, array=array)
def load_numpy(path):
if os.path.isfile(path):
data = np.load(path)
return data['array']
return None

View File

@@ -0,0 +1,358 @@
from collections import defaultdict
from skimage.transform import resize
from scipy.ndimage import gaussian_filter
from matplotlib import pyplot as plt
from src.processing.imageprocessing import rgb2gray_value
from scipy import signal
import numpy as np
from PIL import Image
class Line:
def __init__(self, intercept=0, slope=0, points=None):
self.intercept = intercept
self.slope = slope
self.points = [] if points is None else points
self.splits = []
def __str__(self):
return "m="+str(self.slope)+";n="+str(self.intercept)+";split="+str(self.splits)
def cut_receipt(image, draw_steps=False):
# Hough params
THETA_RES = 5
WIDTH_RES = 5
image = image[:, :, :3]
gray, scale = prepare_image(image)
grad_strength, grad_angle = sobel_edges(gray)
edges = canny(grad_strength, grad_angle)
hough, references = hough_lines(edges, theta_res=THETA_RES, width_res=WIDTH_RES)
if draw_steps:
draw_hough_lines(image, scale, hough, references, edges.shape, theta_res=THETA_RES, width_res=WIDTH_RES)
lines = convert_to_lines(scale, hough, references, edges.shape, theta_res=THETA_RES, width_res=WIDTH_RES)
lines = split_segments(lines, image.shape)
lines = find_important_segments(lines, scale)
max_score, corners = find_largest_rectangle(lines, image.shape)
if corners is not None:
if draw_steps:
draw_rectangle(image, corners)
return crop_image(image, corners)
return image
def prepare_image(image):
gray = rgb2gray_value(image)
gray = resize(gray, (500, int(gray.shape[1] / gray.shape[0] * 500)), mode="reflect")
scale = image.shape[0] / gray.shape[0]
gray = gaussian_filter(gray, sigma=max(3, scale * 2 - 8))
return gray, scale
def sobel_edges(gray):
sobely = np.array([[0, 2, 0], [0, 0, 0], [0, -2, 0]], dtype='float')
gray_y = signal.convolve2d(gray, sobely, boundary='symm', mode='same')
sobelx = np.array([[0, 0, 0], [-2, 0, 2], [0, 0, 0]], dtype='float')
gray_x = signal.convolve2d(gray, sobelx, boundary='symm', mode='same')
grad_strength = np.sqrt(np.square(gray_y)+np.square(gray_x))
grad_angle = np.arctan(np.true_divide(gray_y, gray_x, where=gray_x != 0))
return grad_strength, grad_angle
def canny(grad_strength, grad_angle):
# Angle preparation
grad_angle = np.round(grad_angle / np.pi * 4) + 2
grad_angle = np.array(grad_angle, dtype="uint8")
grad_angle[grad_angle == 4] = 0
h, w = grad_strength.shape
# Canny
CANNY_MIN = 0.01
CANNY_MAX = 0.02
strenghts = np.zeros_like(grad_strength)
for y in range(1, h-1):
for x in range(1, w-1):
if grad_strength[y, x] < CANNY_MIN:
continue
if grad_angle[y, x] == 0:
if grad_strength[y-1, x] < grad_strength[y, x] and grad_strength[y+1, x] < grad_strength[y, x]:
strenghts[y, x] = grad_strength[y, x]
elif grad_angle[y, x] == 1:
if grad_strength[y-1, x-1] < grad_strength[y, x] and grad_strength[y+1, x+1] < grad_strength[y, x]:
strenghts[y, x] = grad_strength[y, x]
elif grad_angle[y, x] == 2:
if grad_strength[y, x-1] < grad_strength[y, x] and grad_strength[y, x+1] < grad_strength[y, x]:
strenghts[y, x] = grad_strength[y, x]
elif grad_angle[y, x] == 3:
if grad_strength[y-1, x+1] < grad_strength[y, x] and grad_strength[y+1, x-1] < grad_strength[y, x]:
strenghts[y, x] = grad_strength[y, x]
return strenghts
def hough_lines(canny, theta_res=5, width_res=5):
h, w = canny.shape
theta_angle = 180 // theta_res
mid = int(np.round(np.sqrt((h//width_res)**2+(w//width_res)**2)))
hough = np.zeros((theta_angle, mid*2+2))
references = defaultdict(list)
for y in range(1, h-1):
for x in range(1, w-1):
if canny[y, x] > 0:
for theta in range(theta_angle):
t = theta * np.pi / theta_angle
q = (x * np.cos(t) + y * np.sin(t))/width_res + mid
hough[theta, int(q)] += 1
references[int(q) * theta_angle + theta].append([y, x])
lines = np.unravel_index(np.argsort(hough.ravel())[-100:][::-1], hough.shape)
lines = np.array(lines).T
results = []
COVER_Y = 10
COVER_X = 15
for line in lines:
if hough[line[0], line[1]] > 0:
results.append([line[0]*theta_res, (line[1]-mid)*width_res])
hough[max(0, line[0]-COVER_Y):line[0]+COVER_Y, max(0, line[1]-COVER_X):line[1]+COVER_X] = 0
y1, x1, y2, x2 = None, None, None, None
if line[0]-COVER_Y < 0:
y1 = hough.shape[0]+line[0]-COVER_Y
if line[1]-COVER_X < 0:
x1 = hough.shape[1]+line[1]-COVER_X
if line[0]+COVER_Y > hough.shape[0]:
y2 = line[0]+COVER_Y-hough.shape[0]
if line[1]+COVER_X > hough.shape[1]:
x2 = line[1]+COVER_X-hough.shape[1]
if any(x is not None for x in [y1, x1, y2, x2]):
ty1 = y1 if y1 is not None else (0 if y2 is not None else line[0]-COVER_Y)
tx1 = x1 if x1 is not None else (0 if x2 is not None else line[1]-COVER_X)
ty2 = y2 if y2 is not None else (hough.shape[0] if y1 is not None else line[0]+COVER_Y)
tx2 = x2 if x2 is not None else (hough.shape[1] if x1 is not None else line[1]+COVER_X)
hough[ty1:ty2, tx1:tx2] = 0
if len(results) > 5:
break
results = np.array(results)
return results, references
def draw_hough_lines(image, scale, results, references, shape, theta_res=5, width_res=5):
h, w = shape
theta_angle = 180 // theta_res
mid = int(np.round(np.sqrt((h//width_res)**2+(w//width_res)**2)))
draw_image = np.copy(image)
RED_WIDTH = 10
GREEN_WIDTH = 5
for result in results:
refs = references[(result[1]/width_res+mid) * theta_angle + result[0]//theta_res]
for ref in refs:
xa = int(scale * ref[1])
ya = int(scale * ref[0])
draw_image[max(0, ya - RED_WIDTH):ya + RED_WIDTH, max(0, xa - RED_WIDTH):xa + RED_WIDTH] = np.array([0, 255, 0])
if result[0] == 0:
x = int(result[1] * scale)
for y in range(image.shape[0]):
draw_image[max(0, y-GREEN_WIDTH):y+GREEN_WIDTH, max(0, x-GREEN_WIDTH):x+GREEN_WIDTH] = np.array([255, 0, 0])
else:
angle = (90 - result[0]) / 180 * np.pi
m = np.sin(angle) / np.cos(angle)
n = result[1] / np.cos(angle) * scale
for x in range(image.shape[1]):
y = int(n - x * m)
if 0 < y < image.shape[0]:
draw_image[max(0, y - GREEN_WIDTH):y + GREEN_WIDTH, max(0, x - GREEN_WIDTH):x + GREEN_WIDTH] = np.array([255, 0, 0])
plt.imshow(draw_image)
plt.show()
def convert_to_lines(scale, results, references, shape, theta_res=5, width_res=5):
h, w = shape
theta_angle = 180 // theta_res
mid = int(np.round(np.sqrt((h//width_res)**2+(w//width_res)**2)))
lines = []
for result in results:
points = []
refs = references[(result[1]/width_res+mid) * theta_angle + result[0]//theta_res]
for ref in refs:
xa = int(scale * ref[1])
ya = int(scale * ref[0])
points.append([ya, xa])
if result[0] == 0:
x = int(result[1] * scale)
lines.append(Line(intercept=x, slope=None, points=points))
else:
angle = (90 - result[0]) / 180 * np.pi
m = np.sin(angle) / -np.cos(angle)
n = result[1] / np.cos(angle) * scale
lines.append(Line(intercept=n, slope=m, points=points))
return lines
def split_segments(lines, image_shape):
for idx1, line1 in enumerate(lines):
for idx2, line2 in enumerate(lines[idx1+1:], idx1+1):
if line1.slope == line2.slope:
continue
elif line1.slope is None:
x = line1.intercept
y = line2.intercept + line2.slope * line1.intercept
if 0 < y < image_shape[0]:
line1.splits.append((int(y), int(x), idx2))
line2.splits.append((int(y), int(x), idx1))
elif line2.slope is None:
x = line2.intercept
y = line1.intercept + line1.slope * line2.intercept
if 0 < y < image_shape[0]:
line1.splits.append((int(y), int(x), idx2))
line2.splits.append((int(y), int(x), idx1))
else:
x = (line2.intercept - line1.intercept) / (line1.slope - line2.slope)
y = line1.intercept + line1.slope * x
if 0 < x < image_shape[1] and 0 < y < image_shape[0]:
line1.splits.append((int(y), int(x), idx2))
line2.splits.append((int(y), int(x), idx1))
if line1.slope is None:
line1.splits.append((0, int(line1.intercept), None))
line1.splits.append((image_shape[0], int(line1.intercept), None))
elif line1.slope == 0:
line1.splits.append((int(line1.intercept), 0, None))
line1.splits.append((int(line1.intercept), image_shape[1], None))
else:
y = min(max(0, line1.intercept), image_shape[0])
x = (y - line1.intercept) / line1.slope
line1.splits.append((int(y), int(x), None))
y = min(max(0, line1.intercept+line1.slope*image_shape[1]), image_shape[0])
x = (y - line1.intercept) / line1.slope
line1.splits.append((int(y), int(x), None))
if line1.slope is not None:
line1.splits = sorted(line1.splits, key=lambda x: (x[1], x[0]))
else:
line1.splits = sorted(line1.splits, key=lambda x: (x[0], x[1]))
return lines
def find_important_segments(lines, scale):
NOT_RELEVANT_THRESHOLD = 0.10
ADD_TINY_FRAGMENTS = 30
for line in lines:
counts = np.zeros((len(line.splits)-1, ))
for point in line.points:
if line.slope is None:
x = point[0]
else:
a, b = point[1], point[0]
m, n = line.slope, line.intercept
x = (a+b*m-m*n) / (m**2 + 1)
for i in range(len(line.splits)-1):
if line.slope is not None:
lower = line.splits[i][1]
upper = line.splits[i+1][1]
else:
lower = line.splits[i][0]
upper = line.splits[i+1][0]
if lower <= x < upper:
counts[i] += 1
break
counts = counts / np.sum(counts)
start = None
end = None
for idx, count in enumerate(counts):
if count > NOT_RELEVANT_THRESHOLD:
if start is None:
start = idx
end = idx + 1
if start is None:
line.splits = []
else:
while start > 0 and np.sqrt((line.splits[start][0]-line.splits[start-1][0])**2 + (line.splits[start][1]-line.splits[start-1][1])**2) / scale < ADD_TINY_FRAGMENTS:
start -= 1
while end < len(line.splits)-1 and np.sqrt((line.splits[end][0]-line.splits[end+1][0])**2 + (line.splits[end][1]-line.splits[end+1][1])**2) / scale < ADD_TINY_FRAGMENTS:
end += 1
line.splits = line.splits[start:end+1]
# check if reverse reference exits
for idx, line in enumerate(lines):
new_splits = []
for split in line.splits:
if split[2] is None:
new_splits.append(split)
continue
for split2 in lines[split[2]].splits:
if split2[2] == idx:
new_splits.append(split)
break
line.splits = new_splits
return lines
def find_largest_rectangle(lines, image_shape):
def find_polygon(number, used, next, target, edges):
max_score, corners = 0, None
if number < 2:
return max_score, corners
if next is None:
return max_score, corners
current = lines[next]
for neighbor in current.splits:
if next == target and number == 2:
if neighbor[2] == used[0]:
e = np.array(edges[:] + [tuple(neighbor[:2])])
a = abs((e[0, 1] - e[2, 1])*(e[3, 0] - e[1, 0]) + (e[1, 1] - e[3, 1])*(e[0, 0] - e[2, 0]))/2
# a := "fraction of rectangle to overall image"
a = a/image_shape[0]/image_shape[1]
return a, edges[:] + [tuple(neighbor[:2])]
else:
continue
if neighbor[2] not in used:
res_score, res_corners = find_polygon(number - 1, used[:] + [next], neighbor[2], target, edges[:] + [tuple(neighbor[:2])])
if res_score > max_score:
max_score, corners = res_score, res_corners
return max_score, corners
max_score, corners = 0, None
for lidx, line in enumerate(lines):
for idx, s in enumerate(line.splits[:-1]):
for e in line.splits[idx + 1:]:
res_score, res_corners = find_polygon(4, [lidx], e[2], s[2], [tuple(e[:2])])
if res_score > max_score and res_score > 0.15:
max_score, corners = res_score, res_corners
if corners is not None:
corners = np.array(corners)
# check rectangle validity
l = corners.shape[0]
for i in range(corners.shape[0]):
a, b, c = i%l, (i+1)%l, (i+2)%l
v1 = corners[b, :] - corners[a, :]
v2 = corners[b, :] - corners[c, :]
phi = np.arccos(v1.dot(v2) / np.linalg.norm(v1) / np.linalg.norm(v2))
if phi < 0.8:
corners = None
break
return max_score, corners
def draw_rectangle(image, corners):
l = corners.shape[0]
draw_image = np.copy(image)
for i in range(corners.shape[0]):
a, b = corners[i%l], corners[(i+1)%l]
for i in range(5000):
x = int(a[1] + (b[1] - a[1]) * i / 5000)
y = int(a[0] + (b[0] - a[0]) * i / 5000)
draw_image[max(0, y - 15):y + 10, max(0, x - 10):x + 10] = np.array([255, 0, 0])
plt.imshow(draw_image)
plt.show()
def crop_image(image, corners):
topleft = np.argmin(np.linalg.norm(corners, axis=1))
corners = np.roll(corners, -topleft-1, axis=0)
h = int((corners[0, 0] + corners[1, 0] - corners[2, 0] - corners[3, 0]) / 2)
w = int((corners[1, 1] + corners[2, 1] - corners[0, 1] - corners[3, 1]) / 2)
pb = np.copy(corners)[:, ::-1].reshape((8,))
img = Image.fromarray(image)
convert = np.rot90(np.asarray(img.transform((h, w), Image.QUAD, pb, Image.BICUBIC)))
return convert

View File

@@ -0,0 +1,338 @@
from collections import defaultdict
from skimage.transform import resize
from scipy.ndimage import gaussian_filter
from matplotlib import pyplot as plt
from src.processing.imageprocessing import rgb2gray_value
from scipy import signal
import numpy as np
from PIL import Image
class Line:
def __init__(self, intercept=0, slope=0, points=None):
self.intercept = intercept
self.slope = slope
self.points = [] if points is None else points
self.splits = []
def __str__(self):
return "m="+str(self.slope)+";n="+str(self.intercept)+";split="+str(self.splits)
def cut_receipt(image):
image = image[:, :, :3]
gray = rgb2gray_value(image)
gray = resize(gray, (500, int(gray.shape[1] / gray.shape[0] * 500)), mode="reflect")
scale = image.shape[0] / gray.shape[0]
print(scale)
gray = gaussian_filter(gray, sigma=max(3, scale*2-8))
#gauss = np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]], dtype='float')/16
#gray = signal.convolve2d(gray, gauss, boundary='symm', mode='same')
sobely = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], dtype='float')
sobely = np.array([[0, 2, 0], [0, 0, 0], [0, -2, 0]], dtype='float')
gray_y = signal.convolve2d(gray, sobely, boundary='symm', mode='same')
sobelx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype='float')
sobelx = np.array([[0, 0, 0], [-2, 0, 2], [0, 0, 0]], dtype='float')
gray_x = signal.convolve2d(gray, sobelx, boundary='symm', mode='same')
grad_strength = np.sqrt(np.square(gray_y)+np.square(gray_x))
grad_angle = np.arctan(np.true_divide(gray_y, gray_x, where=gray_x != 0))
angles = np.copy(grad_angle)
grad_angle = np.round(grad_angle / np.pi * 4) + 2
grad_angle = np.array(grad_angle, dtype="uint8")
grad_angle[grad_angle == 4] = 0
h, w = grad_strength.shape
#plt.imshow(grad_strength, cmap="gray")
#plt.show()
# Canny
CANNY_MIN = 0.01
CANNY_MAX = 0.002
strenghts = np.zeros_like(grad_strength)
for y in range(1, h-1):
for x in range(1, w-1):
if grad_strength[y, x] < CANNY_MIN:
continue
if grad_angle[y, x] == 0:
if grad_strength[y-1, x] < grad_strength[y, x] and grad_strength[y+1, x] < grad_strength[y, x]:
strenghts[y, x] = grad_strength[y, x]
elif grad_angle[y, x] == 1:
if grad_strength[y-1, x-1] < grad_strength[y, x] and grad_strength[y+1, x+1] < grad_strength[y, x]:
strenghts[y, x] = grad_strength[y, x]
elif grad_angle[y, x] == 2:
if grad_strength[y, x-1] < grad_strength[y, x] and grad_strength[y, x+1] < grad_strength[y, x]:
strenghts[y, x] = grad_strength[y, x]
elif grad_angle[y, x] == 3:
if grad_strength[y-1, x+1] < grad_strength[y, x] and grad_strength[y+1, x-1] < grad_strength[y, x]:
strenghts[y, x] = grad_strength[y, x]
# Hough-Lines
theta_res = 5
width_res = 5
theta_angle = 180 // theta_res
mid = int(np.round(np.sqrt((h//width_res)**2+(w//width_res)**2)))
hough = np.zeros((theta_angle, mid*2+2))
references = defaultdict(list)
for y in range(1, h-1):
for x in range(1, w-1):
if strenghts[y, x] > 0:
for theta in range(theta_angle):
t = theta * np.pi / theta_angle
q = (x * np.cos(t) + y * np.sin(t))/width_res + mid
#print(theta*theta_res, y//width_res, x//width_res, q - mid, t)
hough[theta, int(q)] += 1#strenghts[y, x]
#print(theta, int(q), 175//theta_res, -120//width_res + mid + 1)
references[int(q) * theta_angle + theta].append([y, x])
lines = np.unravel_index(np.argsort(hough.ravel())[-100:][::-1], hough.shape)
lines = np.array(lines).T
results = []
COVER_Y = 10
COVER_X = 15
for line in lines:
if hough[line[0], line[1]] > 0:
results.append([line[0]*theta_res, (line[1]-mid)*width_res])
hough[max(0, line[0]-COVER_Y):line[0]+COVER_Y, max(0, line[1]-COVER_X):line[1]+COVER_X] = 0
y1, x1, y2, x2 = None, None, None, None
if line[0]-COVER_Y < 0:
y1 = hough.shape[0]+line[0]-COVER_Y
if line[1]-COVER_X < 0:
x1 = hough.shape[1]+line[1]-COVER_X
if line[0]+COVER_Y > hough.shape[0]:
y2 = line[0]+COVER_Y-hough.shape[0]
if line[1]+COVER_X > hough.shape[1]:
x2 = line[1]+COVER_X-hough.shape[1]
if any(x is not None for x in [y1, x1, y2, x2]):
ty1 = y1 if y1 is not None else (0 if y2 is not None else line[0]-COVER_Y)
tx1 = x1 if x1 is not None else (0 if x2 is not None else line[1]-COVER_X)
ty2 = y2 if y2 is not None else (hough.shape[0] if y1 is not None else line[0]+COVER_Y)
tx2 = x2 if x2 is not None else (hough.shape[1] if x1 is not None else line[1]+COVER_X)
hough[ty1:ty2, tx1:tx2] = 0
if len(results) > 5:
break
results = np.array(results)
#print(results)
# draw image (removable)
draw_image = np.copy(image)
RED_WIDTH = 10
GREEN_WIDTH = 5
for result in results:
refs = references[(result[1]/width_res+mid) * theta_angle + result[0]//theta_res]
for ref in refs:
xa = int(scale * ref[1])
ya = int(scale * ref[0])
draw_image[max(0, ya - RED_WIDTH):ya + RED_WIDTH, max(0, xa - RED_WIDTH):xa + RED_WIDTH] = np.array([0, 255, 0])
if result[0] == 0:
x = int(result[1] * scale)
for y in range(image.shape[0]):
draw_image[max(0, y-GREEN_WIDTH):y+GREEN_WIDTH, max(0, x-GREEN_WIDTH):x+GREEN_WIDTH] = np.array([255, 0, 0])
else:
angle = (90 - result[0]) / 180 * np.pi
m = np.sin(angle) / np.cos(angle)
n = result[1] / np.cos(angle) * scale
for x in range(image.shape[1]):
y = int(n - x * m)
if 0 < y < image.shape[0]:
draw_image[max(0, y - GREEN_WIDTH):y + GREEN_WIDTH, max(0, x - GREEN_WIDTH):x + GREEN_WIDTH] = np.array([255, 0, 0])
#plt.imshow(draw_image)
#plt.show()
# convert to original image pixel
scale = image.shape[0] / gray.shape[0]
lines = []
for result in results:
points = []
refs = references[(result[1]/width_res+mid) * theta_angle + result[0]//theta_res]
for ref in refs:
xa = int(scale * ref[1])
ya = int(scale * ref[0])
points.append([ya, xa])
if result[0] == 0:
x = int(result[1] * scale)
lines.append(Line(intercept=x, slope=None, points=points))
else:
angle = (90 - result[0]) / 180 * np.pi
m = np.sin(angle) / -np.cos(angle)
n = result[1] / np.cos(angle) * scale
lines.append(Line(intercept=n, slope=m, points=points))
# split segments
for idx1, line1 in enumerate(lines):
for idx2, line2 in enumerate(lines[idx1+1:], idx1+1):
if line1.slope == line2.slope:
continue
elif line1.slope is None:
x = line1.intercept
y = line2.intercept + line2.slope * line1.intercept
if 0 < y < image.shape[0]:
line1.splits.append((int(y), int(x), idx2))
line2.splits.append((int(y), int(x), idx1))
elif line2.slope is None:
x = line2.intercept
y = line1.intercept + line1.slope * line2.intercept
if 0 < y < image.shape[0]:
line1.splits.append((int(y), int(x), idx2))
line2.splits.append((int(y), int(x), idx1))
else:
x = (line2.intercept - line1.intercept) / (line1.slope - line2.slope)
y = line1.intercept + line1.slope * x
#print(x, y)
if 0 < x < image.shape[1] and 0 < y < image.shape[0]:
line1.splits.append((int(y), int(x), idx2))
line2.splits.append((int(y), int(x), idx1))
if line1.slope is None:
line1.splits.append((0, int(line1.intercept), None))
line1.splits.append((image.shape[0], int(line1.intercept), None))
elif line1.slope == 0:
line1.splits.append((int(line1.intercept), 0, None))
line1.splits.append((int(line1.intercept), image.shape[1], None))
else:
y = min(max(0, line1.intercept), image.shape[0])
x = (y - line1.intercept) / line1.slope
line1.splits.append((int(y), int(x), None))
y = min(max(0, line1.intercept+line1.slope*image.shape[1]), image.shape[0])
x = (y - line1.intercept) / line1.slope
line1.splits.append((int(y), int(x), None))
if line1.slope is not None:
line1.splits = sorted(line1.splits, key=lambda x: (x[1], x[0]))
else:
line1.splits = sorted(line1.splits, key=lambda x: (x[0], x[1]))
#print(line1)
# find important segments
for line in lines:
counts = np.zeros((len(line.splits)-1, ))
for point in line.points:
if line.slope is None:
x = point[0]
else:
a, b = point[1], point[0]
m, n = line.slope, line.intercept
x = (a+b*m-m*n) / (m**2 + 1)
for i in range(len(line.splits)-1):
if line.slope is not None:
lower = line.splits[i][1]
upper = line.splits[i+1][1]
else:
lower = line.splits[i][0]
upper = line.splits[i+1][0]
if lower <= x < upper:
counts[i] += 1
break
#print(counts)
#print("before", line)
counts = counts / np.sum(counts)
start = None
end = None
for idx, count in enumerate(counts):
if count > 0.10:
if start is None:
start = idx
end = idx + 1
if start is None:
line.splits = []
else:
while start > 0 and np.sqrt((line.splits[start][0]-line.splits[start-1][0])**2 + (line.splits[start][1]-line.splits[start-1][1])**2) / scale < 30:
#print("start", np.sqrt((line.splits[start][0]-line.splits[start-1][0])**2 + (line.splits[start][1]-line.splits[start-1][1])**2) / scale)
start -= 1
while end < len(line.splits)-1 and np.sqrt((line.splits[end][0]-line.splits[end+1][0])**2 + (line.splits[end][1]-line.splits[end+1][1])**2) / scale < 30:
#print("end", np.sqrt((line.splits[end][0]-line.splits[end+1][0])**2 + (line.splits[end][1]-line.splits[end+1][1])**2) / scale)
end += 1
line.splits = line.splits[start:end+1]
#print("after", line)
for idx, line in enumerate(lines):
new_splits = []
for split in line.splits:
if split[2] is None:
new_splits.append(split)
continue
for split2 in lines[split[2]].splits:
if split2[2] == idx:
new_splits.append(split)
break
line.splits = new_splits
#print("after2", line)
print()
# find largest rectangle
def find_polygon(number, used, next, target, edges):
#print(number, used, next, target, edges)
max_score, corners = 0, None
if number < 2:
return max_score, corners
if next is None:
return max_score, corners
current = lines[next]
for neighbor in current.splits:
#print(number, "--", neighbor)
if next == target and number == 2:
if neighbor[2] == used[0]:
e = np.array(edges[:] + [tuple(neighbor[:2])])
#print(used[:] + [next], target, edges[:] + [tuple(neighbor[:2])])
a = abs((e[0, 1] - e[2, 1])*(e[3, 0] - e[1, 0]) + (e[1, 1] - e[3, 1])*(e[0, 0] - e[2, 0]))/2
a = a/image.shape[0]/image.shape[1]
#print(a)
return a, edges[:] + [tuple(neighbor[:2])]
else:
continue
if neighbor[2] not in used:
res_score, res_corners = find_polygon(number - 1, used[:] + [next], neighbor[2], target, edges[:] + [tuple(neighbor[:2])])
if res_score > max_score:
max_score, corners = res_score, res_corners
return max_score, corners
max_score, corners = 0, None
for lidx, line in enumerate(lines):
for idx, s in enumerate(line.splits[:-1]):
for e in line.splits[idx + 1:]:
# print(line.name, (s[0], s[1], None if s[2] is None else lines[s[2]].name), (e[0], e[1], None if e[2] is None else lines[e[2]].name))
res_score, res_corners = find_polygon(4, [lidx], e[2], s[2], [tuple(e[:2])])
if res_score > max_score and res_score > 0.15:
max_score, corners = res_score, res_corners
#print(max_score, corners)
#print(image.shape)
if corners is not None:
corners = np.array(corners)
# check rectangle validity
l = corners.shape[0]
for i in range(corners.shape[0]):
a, b, c = i%l, (i+1)%l, (i+2)%l
v1 = corners[b, :] - corners[a, :]
v2 = corners[b, :] - corners[c, :]
#print(v1, v2, np.linalg.norm(v1))
phi = np.arccos(v1.dot(v2) / np.linalg.norm(v1) / np.linalg.norm(v2))
#print(phi, corners[b])
if phi < 0.8:
corners = None
break
if corners is not None:
# draw image (removable)
draw_image = np.copy(image)
for i in range(corners.shape[0]):
a, b = corners[i%l], corners[(i+1)%l]
for i in range(5000):
x = int(a[1] + (b[1] - a[1]) * i / 5000)
y = int(a[0] + (b[0] - a[0]) * i / 5000)
draw_image[max(0, y - 15):y + 10, max(0, x - 10):x + 10] = np.array([255, 0, 0])
#plt.imshow(draw_image)
#plt.show()
# crop image
topleft = np.argmin(np.linalg.norm(corners, axis=1))
corners = np.roll(corners, -topleft-1, axis=0)
print(corners)
h = int((corners[0, 0] + corners[1, 0] - corners[2, 0] - corners[3, 0]) / 2)
w = int((corners[1, 1] + corners[2, 1] - corners[0, 1] - corners[3, 1]) / 2)
pb = np.copy(corners)[:, ::-1].reshape((8,))
img = Image.fromarray(image)
convert = np.rot90(np.asarray(img.transform((h, w), Image.QUAD, pb, Image.BICUBIC)))
plt.imshow(convert)
plt.show()
return convert
else:
return image

0
src/utils/__init__.py Normal file
View File

View File

@@ -0,0 +1,93 @@
from matplotlib.colors import LinearSegmentedColormap
import colorsys
import numpy as np
def rand_cmap(nlabels, type='bright', first_color_black=True, last_color_black=False, verbose=True):
"""
Creates a random colormap to be used together with matplotlib. Useful for segmentation tasks
:param nlabels: Number of labels (size of colormap)
:param type: 'bright' for strong colors, 'soft' for pastel colors
:param first_color_black: Option to use first color as black, True or False
:param last_color_black: Option to use last color as black, True or False
:param verbose: Prints the number of labels and shows the colormap. True or False
:return: colormap for matplotlib
"""
if type not in ('bright', 'soft', 'hard'):
print ('Please choose "hard", "bright" or "soft" for type')
return
if verbose:
print('Number of labels: ' + str(nlabels))
# Generate color map for bright colors, based on hsv
if type == 'bright':
randHSVcolors = [(np.random.uniform(low=0.0, high=1),
np.random.uniform(low=0.2, high=1),
np.random.uniform(low=0.9, high=1)) for i in range(nlabels)]
# Convert HSV list to RGB
randRGBcolors = []
for HSVcolor in randHSVcolors:
randRGBcolors.append(colorsys.hsv_to_rgb(HSVcolor[0], HSVcolor[1], HSVcolor[2]))
if first_color_black:
randRGBcolors[0] = [0, 0, 0]
if last_color_black:
randRGBcolors[-1] = [0, 0, 0]
random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels)
# Generate color map for bright colors, based on hsv
if type == 'hard':
randHSVcolors = [(np.random.uniform(low=0.0, high=1),
np.random.uniform(low=0.7, high=1),
np.random.uniform(low=0.9, high=1)) for i in range(nlabels)]
# Convert HSV list to RGB
randRGBcolors = []
for HSVcolor in randHSVcolors:
randRGBcolors.append(colorsys.hsv_to_rgb(HSVcolor[0], HSVcolor[1], HSVcolor[2]))
if first_color_black:
randRGBcolors[0] = [0, 0, 0]
if last_color_black:
randRGBcolors[-1] = [0, 0, 0]
random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels)
# Generate soft pastel colors, by limiting the RGB spectrum
if type == 'soft':
low = 0.6
high = 0.95
randRGBcolors = [(np.random.uniform(low=low, high=high),
np.random.uniform(low=low, high=high),
np.random.uniform(low=low, high=high)) for i in range(nlabels)]
if first_color_black:
randRGBcolors[0] = [0, 0, 0]
if last_color_black:
randRGBcolors[-1] = [0, 0, 0]
random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels)
# Display colorbar
if verbose:
from matplotlib import colors, colorbar
from matplotlib import pyplot as plt
fig, ax = plt.subplots(1, 1, figsize=(15, 0.5))
bounds = np.linspace(0, nlabels, nlabels + 1)
norm = colors.BoundaryNorm(bounds, nlabels)
cb = colorbar.ColorbarBase(ax, cmap=random_colormap, norm=norm, spacing='proportional', ticks=None,
boundaries=bounds, format='%1i', orientation=u'horizontal')
return random_colormap
def list_cmap(array):
return LinearSegmentedColormap.from_list('new_map', array, N=array.shape[0])