Skip to content

Commit

Permalink
segment-region: clip polygons to parent...
Browse files Browse the repository at this point in the history
(before annotating polygon coordinates, ensure they fit
 into their parent if it exists, or the page frame otherwise)
  • Loading branch information
bertsky committed May 26, 2020
1 parent 9d2253e commit 9e5f8e0
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

Fixed:

* segment-region: ensure polygons are within page/Border

Changed:

* segment-region: in `sparse_text` mode, also add text lines
Expand Down
28 changes: 28 additions & 0 deletions ocrd_tesserocr/segment_region.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import absolute_import

import os.path
from shapely.geometry import Polygon
from tesserocr import (
PyTessBaseAPI,
PSM, RIL, PT
Expand All @@ -20,6 +21,7 @@
MetadataItemType,
LabelsType, LabelType,
CoordsType, AlternativeImageType,
PageType,
OrderedGroupType,
ReadingOrderType,
RegionRefIndexedType,
Expand Down Expand Up @@ -215,6 +217,7 @@ def _process_page(self, it, page, page_image, page_coords, page_id):
else:
polygon = polygon_from_x0y0x1y1(bbox)
polygon = coordinates_for_segment(polygon, page_image, page_coords)
polygon = polygon_for_parent(polygon, page)
points = points_from_polygon(polygon)
coords = CoordsType(points=points)
# if xywh['w'] < 30 or xywh['h'] < 30:
Expand Down Expand Up @@ -299,3 +302,28 @@ def _process_page(self, it, page, page_image, page_coords, page_id):
not og.get_UnorderedGroupIndexed()):
# schema forbids empty OrderedGroup
ro.set_OrderedGroup(None)

def polygon_for_parent(polygon, parent):
"""Clip polygon to parent polygon range.
(Should be moved to ocrd_utils.coordinates_for_segment.)
"""
childp = Polygon(polygon)
if isinstance(parent, PageType):
if parent.get_Border():
parentp = Polygon(polygon_from_points(parent.get_Border().get_Coords().points))
else:
parentp = Polygon([[0,0], [0,parent.get_imageHeight()],
[parent.get_imageWidth(),parent.get_imageHeight()],
[parent.get_imageWidth(),0]])
else:
parentp = Polygon(polygon_from_points(parent.get_Coords().points))
if childp.within(parentp):
return polygon
interp = childp.intersection(parentp)
if interp.is_empty:
# FIXME: we need a better strategy against this
raise Exception("intersection of would-be segment with parent is empty")
if interp.type == 'MultiPolygon':
interp = interp.convex_hull
return interp.exterior.coords[:-1] # keep open

0 comments on commit 9e5f8e0

Please sign in to comment.