From 684f38f88d39d6fcba672c73da36bca2ce13efbd Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 10 Oct 2024 10:31:22 +0200 Subject: [PATCH 1/9] Improve reconstruction --- doctr/io/elements.py | 12 +- doctr/utils/reconstitution.py | 183 ++++++++++++++-------- tests/common/test_utils_reconstitution.py | 51 +++++- 3 files changed, 177 insertions(+), 69 deletions(-) diff --git a/doctr/io/elements.py b/doctr/io/elements.py index b27ecb35eb..2b4d0b0581 100644 --- a/doctr/io/elements.py +++ b/doctr/io/elements.py @@ -310,6 +310,10 @@ def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, ** def synthesize(self, **kwargs) -> np.ndarray: """Synthesize the page from the predictions + Args: + ---- + **kwargs: keyword arguments passed to the `synthesize_page` method + Returns ------- synthesized page @@ -493,7 +497,7 @@ def synthesize(self, **kwargs) -> np.ndarray: Args: ---- - **kwargs: keyword arguments passed to the matplotlib.pyplot.show method + **kwargs: keyword arguments passed to the `synthesize_kie_page` method Returns: ------- @@ -603,11 +607,15 @@ def show(self, **kwargs) -> None: def synthesize(self, **kwargs) -> List[np.ndarray]: """Synthesize all pages from their predictions + Args: + ---- + **kwargs: keyword arguments passed to the `Page.synthesize` method + Returns ------- list of synthesized pages """ - return [page.synthesize() for page in self.pages] + return [page.synthesize(**kwargs) for page in self.pages] def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]: """Export the document as XML (hOCR-format) diff --git a/doctr/utils/reconstitution.py b/doctr/utils/reconstitution.py index 82ae20cdd0..7409799241 100644 --- a/doctr/utils/reconstitution.py +++ b/doctr/utils/reconstitution.py @@ -2,6 +2,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. +import logging from typing import Any, Dict, Optional import numpy as np @@ -13,10 +14,90 @@ __all__ = ["synthesize_page", "synthesize_kie_page"] +def _synthesize( + response: Image.Image, + entry: Dict[str, Any], + w: int, + h: int, + draw_proba: bool = False, + font_family: Optional[str] = None, + smoothing_factor: float = 0.95, + min_font_size: int = 8, + max_font_size: int = 50, +) -> Image.Image: + if len(entry["geometry"]) == 2: + (xmin, ymin), (xmax, ymax) = entry["geometry"] + polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)] + else: + polygon = entry["geometry"] + + # Calculate the bounding box of the word + x_coords, y_coords = zip(*polygon) + xmin, ymin, xmax, ymax = ( + int(round(w * min(x_coords))), + int(round(h * min(y_coords))), + int(round(w * max(x_coords))), + int(round(h * max(y_coords))), + ) + word_width = xmax - xmin + word_height = ymax - ymin + + word_text = entry["value"] + # Find the optimal font size + try: + font_size = min(word_height, max_font_size) + font = get_font(font_family, font_size) + text_width, text_height = font.getbbox(word_text)[2:4] + + while (text_width > word_width or text_height > word_height) and font_size > min_font_size: + font_size = max(int(font_size * smoothing_factor), min_font_size) + font = get_font(font_family, font_size) + text_width, text_height = font.getbbox(word_text)[2:4] + except ValueError: + font = get_font(font_family, min_font_size) + text_width, text_height = font.getbbox(word_text)[2:4] + + # Calculate centering offsets + x_offset = (word_width - text_width) // 2 + y_offset = (word_height - text_height) // 2 + + # Create a mask for the word + mask = Image.new("L", (w, h), 0) + ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255) + + # Draw the word text + d = ImageDraw.Draw(response) + try: + d.text((xmin + x_offset, ymin + y_offset), word_text, font=font, fill=(0, 0, 0), anchor="lt") + except UnicodeEncodeError: + d.text((xmin + x_offset, ymin + y_offset), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt") + + if draw_proba: + p = int(255 * entry["confidence"]) + color = (255 - p, 0, p) # Red to blue gradient based on probability + d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2) + + prob_font = get_font(font_family, 20) + prob_text = f"{entry['confidence']:.2f}" + prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4] + + # Position the probability slightly above the bounding box + prob_x_offset = (word_width - prob_text_width) // 2 + prob_y_offset = ymin - prob_text_height - 2 + prob_y_offset = max(0, prob_y_offset) + + d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt") + + return response + + def synthesize_page( page: Dict[str, Any], draw_proba: bool = False, font_family: Optional[str] = None, + smoothing_factor: float = 0.95, + min_font_size: int = 8, + max_font_size: int = 50, ) -> np.ndarray: """Draw a the content of the element page (OCR response) on a blank page. @@ -24,8 +105,10 @@ def synthesize_page( ---- page: exported Page object to represent draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 - font_size: size of the font, default font = 13 font_family: family of the font + smoothing_factor: factor to smooth the font size + min_font_size: minimum font size + max_font_size: maximum font size Returns: ------- @@ -33,41 +116,28 @@ def synthesize_page( """ # Draw template h, w = page["dimensions"] - response = 255 * np.ones((h, w, 3), dtype=np.int32) + response = Image.new("RGB", (w, h), color=(255, 255, 255)) + + _warned = False - # Draw each word for block in page["blocks"]: for line in block["lines"]: for word in line["words"]: - # Get absolute word geometry - (xmin, ymin), (xmax, ymax) = word["geometry"] - xmin, xmax = int(round(w * xmin)), int(round(w * xmax)) - ymin, ymax = int(round(h * ymin)), int(round(h * ymax)) - - # White drawing context adapted to font size, 0.75 factor to convert pts --> pix - font = get_font(font_family, int(0.75 * (ymax - ymin))) - img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255)) - d = ImageDraw.Draw(img) - # Draw in black the value of the word - try: - d.text((0, 0), word["value"], font=font, fill=(0, 0, 0)) - except UnicodeEncodeError: - # When character cannot be encoded, use its anyascii version - d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0)) - - # Colorize if draw_proba - if draw_proba: - p = int(255 * word["confidence"]) - mask = np.where(np.array(img) == 0, 1, 0) - proba: np.ndarray = np.array([255 - p, 0, p]) - color = mask * proba[np.newaxis, np.newaxis, :] - white_mask = 255 * (1 - mask) - img = color + white_mask - - # Write to response page - response[ymin:ymax, xmin:xmax, :] = np.array(img) - - return response + if len(word["geometry"]) == 4 and not _warned: + logging.warning("Polygons with larger rotations will lead to inaccurate rendering") + _warned = True + response = _synthesize( + response=response, + entry=word, + w=w, + h=h, + draw_proba=draw_proba, + font_family=font_family, + smoothing_factor=smoothing_factor, + min_font_size=min_font_size, + max_font_size=max_font_size, + ) + return np.array(response, dtype=np.uint8) def synthesize_kie_page( @@ -81,8 +151,10 @@ def synthesize_kie_page( ---- page: exported Page object to represent draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 - font_size: size of the font, default font = 13 font_family: family of the font + smoothing_factor: factor to smooth the font size + min_font_size: minimum font size + max_font_size: maximum font size Returns: ------- @@ -90,37 +162,22 @@ def synthesize_kie_page( """ # Draw template h, w = page["dimensions"] - response = 255 * np.ones((h, w, 3), dtype=np.int32) + response = Image.new("RGB", (w, h), color=(255, 255, 255)) + + _warned = False # Draw each word for predictions in page["predictions"].values(): for prediction in predictions: - # Get aboslute word geometry - (xmin, ymin), (xmax, ymax) = prediction["geometry"] - xmin, xmax = int(round(w * xmin)), int(round(w * xmax)) - ymin, ymax = int(round(h * ymin)), int(round(h * ymax)) - - # White drawing context adapted to font size, 0.75 factor to convert pts --> pix - font = get_font(font_family, int(0.75 * (ymax - ymin))) - img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255)) - d = ImageDraw.Draw(img) - # Draw in black the value of the word - try: - d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0)) - except UnicodeEncodeError: - # When character cannot be encoded, use its anyascii version - d.text((0, 0), anyascii(prediction["value"]), font=font, fill=(0, 0, 0)) - - # Colorize if draw_proba - if draw_proba: - p = int(255 * prediction["confidence"]) - mask = np.where(np.array(img) == 0, 1, 0) - proba: np.ndarray = np.array([255 - p, 0, p]) - color = mask * proba[np.newaxis, np.newaxis, :] - white_mask = 255 * (1 - mask) - img = color + white_mask - - # Write to response page - response[ymin:ymax, xmin:xmax, :] = np.array(img) - - return response + if len(prediction["geometry"]) == 4 and not _warned: + logging.warning("Polygons with larger rotations will lead to inaccurate rendering") + _warned = True + response = _synthesize( + response=response, + entry=prediction, + w=w, + h=h, + draw_proba=draw_proba, + font_family=font_family, + ) + return np.array(response, dtype=np.uint8) diff --git a/tests/common/test_utils_reconstitution.py b/tests/common/test_utils_reconstitution.py index 3b70e67070..518308051b 100644 --- a/tests/common/test_utils_reconstitution.py +++ b/tests/common/test_utils_reconstitution.py @@ -1,12 +1,55 @@ import numpy as np -from test_io_elements import _mock_pages +from test_io_elements import _mock_kie_pages, _mock_pages from doctr.utils import reconstitution def test_synthesize_page(): pages = _mock_pages() - reconstitution.synthesize_page(pages[0].export(), draw_proba=False) - render = reconstitution.synthesize_page(pages[0].export(), draw_proba=True) + # Test without probability rendering + render_no_proba = reconstitution.synthesize_page(pages[0].export(), draw_proba=False) + assert isinstance(render_no_proba, np.ndarray) + assert render_no_proba.shape == (*pages[0].dimensions, 3) + + # Test with probability rendering + render_with_proba = reconstitution.synthesize_page(pages[0].export(), draw_proba=True) + assert isinstance(render_with_proba, np.ndarray) + assert render_with_proba.shape == (*pages[0].dimensions, 3) + + +def test_synthesize_kie_page(): + pages = _mock_kie_pages() + # Test without probability rendering + render_no_proba = reconstitution.synthesize_kie_page(pages[0].export(), draw_proba=False) + assert isinstance(render_no_proba, np.ndarray) + assert render_no_proba.shape == (*pages[0].dimensions, 3) + + # Test with probability rendering + render_with_proba = reconstitution.synthesize_kie_page(pages[0].export(), draw_proba=True) + assert isinstance(render_with_proba, np.ndarray) + assert render_with_proba.shape == (*pages[0].dimensions, 3) + + +def test_synthesize_with_edge_cases(): + page = { + "dimensions": (1000, 1000), + "blocks": [ + { + "lines": [ + { + "words": [ + {"value": "Test", "geometry": [(0, 0), (1, 0), (1, 1), (0, 1)], "confidence": 1.0}, + { + "value": "Overflow", + "geometry": [(0.9, 0.9), (1.1, 0.9), (1.1, 1.1), (0.9, 1.1)], + "confidence": 0.5, + }, + ] + } + ] + } + ], + } + render = reconstitution.synthesize_page(page, draw_proba=True) assert isinstance(render, np.ndarray) - assert render.shape == (*pages[0].dimensions, 3) + assert render.shape == (1000, 1000, 3) From e77293c0bc076de990f6977d61f966e47851a998 Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 10 Oct 2024 10:37:44 +0200 Subject: [PATCH 2/9] Improve reconstruction --- doctr/utils/reconstitution.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doctr/utils/reconstitution.py b/doctr/utils/reconstitution.py index 7409799241..3a551dd13e 100644 --- a/doctr/utils/reconstitution.py +++ b/doctr/utils/reconstitution.py @@ -123,7 +123,7 @@ def synthesize_page( for block in page["blocks"]: for line in block["lines"]: for word in line["words"]: - if len(word["geometry"]) == 4 and not _warned: + if len(word["geometry"]) == 4 and not _warned: # pragma: no cover logging.warning("Polygons with larger rotations will lead to inaccurate rendering") _warned = True response = _synthesize( @@ -169,7 +169,7 @@ def synthesize_kie_page( # Draw each word for predictions in page["predictions"].values(): for prediction in predictions: - if len(prediction["geometry"]) == 4 and not _warned: + if len(prediction["geometry"]) == 4 and not _warned: # pragma: no cover logging.warning("Polygons with larger rotations will lead to inaccurate rendering") _warned = True response = _synthesize( From 8e9e396f101e60eb9b950e5f81bbcd90cbc55c29 Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 10 Oct 2024 10:53:25 +0200 Subject: [PATCH 3/9] Align left side --- doctr/utils/reconstitution.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doctr/utils/reconstitution.py b/doctr/utils/reconstitution.py index 3a551dd13e..8bf89e365c 100644 --- a/doctr/utils/reconstitution.py +++ b/doctr/utils/reconstitution.py @@ -21,8 +21,8 @@ def _synthesize( h: int, draw_proba: bool = False, font_family: Optional[str] = None, - smoothing_factor: float = 0.95, - min_font_size: int = 8, + smoothing_factor: float = 0.75, + min_font_size: int = 6, max_font_size: int = 50, ) -> Image.Image: if len(entry["geometry"]) == 2: @@ -57,9 +57,9 @@ def _synthesize( font = get_font(font_family, min_font_size) text_width, text_height = font.getbbox(word_text)[2:4] - # Calculate centering offsets - x_offset = (word_width - text_width) // 2 - y_offset = (word_height - text_height) // 2 + # Position the text left side of the bounding box + x_offset = 0 + y_offset = 0 # Create a mask for the word mask = Image.new("L", (w, h), 0) From 44f86f9da7ddcfa1da46986b1d27f80b610074c5 Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 10 Oct 2024 11:23:32 +0200 Subject: [PATCH 4/9] Improve with line or words decision --- doctr/utils/reconstitution.py | 58 +++++++++++++++++------ tests/common/test_utils_reconstitution.py | 32 +++---------- 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/doctr/utils/reconstitution.py b/doctr/utils/reconstitution.py index 8bf89e365c..6c0e0dd288 100644 --- a/doctr/utils/reconstitution.py +++ b/doctr/utils/reconstitution.py @@ -14,6 +14,13 @@ __all__ = ["synthesize_page", "synthesize_kie_page"] +def _warn_rotation(entry: Dict[str, Any], already_warned: bool) -> bool: + if len(entry["geometry"]) == 4 and not already_warned: + logging.warning("Polygons with larger rotations will lead to inaccurate rendering") + return True + return already_warned + + def _synthesize( response: Image.Image, entry: Dict[str, Any], @@ -42,7 +49,11 @@ def _synthesize( word_width = xmax - xmin word_height = ymax - ymin - word_text = entry["value"] + # If lines are provided instead of words, concatenate the word entries + if "words" in entry: + word_text = " ".join(word["value"] for word in entry["words"]) + else: + word_text = entry["value"] # Find the optimal font size try: font_size = min(word_height, max_font_size) @@ -55,7 +66,6 @@ def _synthesize( text_width, text_height = font.getbbox(word_text)[2:4] except ValueError: font = get_font(font_family, min_font_size) - text_width, text_height = font.getbbox(word_text)[2:4] # Position the text left side of the bounding box x_offset = 0 @@ -73,12 +83,17 @@ def _synthesize( d.text((xmin + x_offset, ymin + y_offset), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt") if draw_proba: - p = int(255 * entry["confidence"]) + confidence = ( + entry["confidence"] + if "confidence" in entry + else sum(w["confidence"] for w in entry["words"]) / len(entry["words"]) + ) + p = int(255 * confidence) color = (255 - p, 0, p) # Red to blue gradient based on probability d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2) prob_font = get_font(font_family, 20) - prob_text = f"{entry['confidence']:.2f}" + prob_text = f"{confidence:.2f}" prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4] # Position the probability slightly above the bounding box @@ -119,16 +134,14 @@ def synthesize_page( response = Image.new("RGB", (w, h), color=(255, 255, 255)) _warned = False - for block in page["blocks"]: - for line in block["lines"]: - for word in line["words"]: - if len(word["geometry"]) == 4 and not _warned: # pragma: no cover - logging.warning("Polygons with larger rotations will lead to inaccurate rendering") - _warned = True + # If lines are provided use these to get better rendering results + if len(block["lines"]) > 1: + for line in block["lines"]: + _warned = _warn_rotation(block, _warned) response = _synthesize( response=response, - entry=word, + entry=line, w=w, h=h, draw_proba=draw_proba, @@ -137,6 +150,23 @@ def synthesize_page( min_font_size=min_font_size, max_font_size=max_font_size, ) + # Otherwise, draw each word + else: + for line in block["lines"]: + _warned = _warn_rotation(block, _warned) + for word in line["words"]: + response = _synthesize( + response=response, + entry=word, + w=w, + h=h, + draw_proba=draw_proba, + font_family=font_family, + smoothing_factor=smoothing_factor, + min_font_size=min_font_size, + max_font_size=max_font_size, + ) + return np.array(response, dtype=np.uint8) @@ -165,13 +195,11 @@ def synthesize_kie_page( response = Image.new("RGB", (w, h), color=(255, 255, 255)) _warned = False - # Draw each word for predictions in page["predictions"].values(): for prediction in predictions: - if len(prediction["geometry"]) == 4 and not _warned: # pragma: no cover - logging.warning("Polygons with larger rotations will lead to inaccurate rendering") - _warned = True + _warned = _warn_rotation(prediction, _warned) + response = _synthesize( response=response, entry=prediction, diff --git a/tests/common/test_utils_reconstitution.py b/tests/common/test_utils_reconstitution.py index 518308051b..fdb0569b85 100644 --- a/tests/common/test_utils_reconstitution.py +++ b/tests/common/test_utils_reconstitution.py @@ -16,6 +16,13 @@ def test_synthesize_page(): assert isinstance(render_with_proba, np.ndarray) assert render_with_proba.shape == (*pages[0].dimensions, 3) + # Test with only one line + pages_one_line = pages[0].export() + pages_one_line["blocks"][0]["lines"] = [pages_one_line["blocks"][0]["lines"][0]] + render_one_line = reconstitution.synthesize_page(pages_one_line, draw_proba=True) + assert isinstance(render_one_line, np.ndarray) + assert render_one_line.shape == (*pages[0].dimensions, 3) + def test_synthesize_kie_page(): pages = _mock_kie_pages() @@ -28,28 +35,3 @@ def test_synthesize_kie_page(): render_with_proba = reconstitution.synthesize_kie_page(pages[0].export(), draw_proba=True) assert isinstance(render_with_proba, np.ndarray) assert render_with_proba.shape == (*pages[0].dimensions, 3) - - -def test_synthesize_with_edge_cases(): - page = { - "dimensions": (1000, 1000), - "blocks": [ - { - "lines": [ - { - "words": [ - {"value": "Test", "geometry": [(0, 0), (1, 0), (1, 1), (0, 1)], "confidence": 1.0}, - { - "value": "Overflow", - "geometry": [(0.9, 0.9), (1.1, 0.9), (1.1, 1.1), (0.9, 1.1)], - "confidence": 0.5, - }, - ] - } - ] - } - ], - } - render = reconstitution.synthesize_page(page, draw_proba=True) - assert isinstance(render, np.ndarray) - assert render.shape == (1000, 1000, 3) From 6a3235da0b3b111aceca3b086b229cfaec9ca137 Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 10 Oct 2024 11:47:43 +0200 Subject: [PATCH 5/9] remove unused code --- doctr/utils/reconstitution.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/doctr/utils/reconstitution.py b/doctr/utils/reconstitution.py index 6c0e0dd288..c2923d7853 100644 --- a/doctr/utils/reconstitution.py +++ b/doctr/utils/reconstitution.py @@ -67,10 +67,6 @@ def _synthesize( except ValueError: font = get_font(font_family, min_font_size) - # Position the text left side of the bounding box - x_offset = 0 - y_offset = 0 - # Create a mask for the word mask = Image.new("L", (w, h), 0) ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255) @@ -78,9 +74,9 @@ def _synthesize( # Draw the word text d = ImageDraw.Draw(response) try: - d.text((xmin + x_offset, ymin + y_offset), word_text, font=font, fill=(0, 0, 0), anchor="lt") + d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt") except UnicodeEncodeError: - d.text((xmin + x_offset, ymin + y_offset), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt") + d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt") if draw_proba: confidence = ( From 433871e8acab23decf7c805991a7e1fab72d0133 Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 10 Oct 2024 12:15:58 +0200 Subject: [PATCH 6/9] don't cover the polygon check --- doctr/utils/reconstitution.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doctr/utils/reconstitution.py b/doctr/utils/reconstitution.py index c2923d7853..0a301b4399 100644 --- a/doctr/utils/reconstitution.py +++ b/doctr/utils/reconstitution.py @@ -14,7 +14,7 @@ __all__ = ["synthesize_page", "synthesize_kie_page"] -def _warn_rotation(entry: Dict[str, Any], already_warned: bool) -> bool: +def _warn_rotation(entry: Dict[str, Any], already_warned: bool) -> bool: # pragma: no cover if len(entry["geometry"]) == 4 and not already_warned: logging.warning("Polygons with larger rotations will lead to inaccurate rendering") return True @@ -134,7 +134,7 @@ def synthesize_page( # If lines are provided use these to get better rendering results if len(block["lines"]) > 1: for line in block["lines"]: - _warned = _warn_rotation(block, _warned) + _warned = _warn_rotation(block, _warned) # pragma: no cover response = _synthesize( response=response, entry=line, @@ -149,7 +149,7 @@ def synthesize_page( # Otherwise, draw each word else: for line in block["lines"]: - _warned = _warn_rotation(block, _warned) + _warned = _warn_rotation(block, _warned) # pragma: no cover for word in line["words"]: response = _synthesize( response=response, @@ -194,7 +194,7 @@ def synthesize_kie_page( # Draw each word for predictions in page["predictions"].values(): for prediction in predictions: - _warned = _warn_rotation(prediction, _warned) + _warned = _warn_rotation(prediction, _warned) # pragma: no cover response = _synthesize( response=response, From 7f5359b591d04987697f7076c8a195f3baf84303 Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 11 Oct 2024 07:35:33 +0200 Subject: [PATCH 7/9] apply suggestions --- doctr/utils/reconstitution.py | 31 +++++++++++++---------- tests/common/test_utils_reconstitution.py | 6 +++++ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/doctr/utils/reconstitution.py b/doctr/utils/reconstitution.py index 0a301b4399..a229e9ddbc 100644 --- a/doctr/utils/reconstitution.py +++ b/doctr/utils/reconstitution.py @@ -14,11 +14,15 @@ __all__ = ["synthesize_page", "synthesize_kie_page"] -def _warn_rotation(entry: Dict[str, Any], already_warned: bool) -> bool: # pragma: no cover - if len(entry["geometry"]) == 4 and not already_warned: +# Global variable to avoid multiple warnings +ROTATION_WARNING = False + + +def _warn_rotation(entry: Dict[str, Any]) -> None: # pragma: no cover + global ROTATION_WARNING + if not ROTATION_WARNING and len(entry["geometry"]) == 4: logging.warning("Polygons with larger rotations will lead to inaccurate rendering") - return True - return already_warned + ROTATION_WARNING = True def _synthesize( @@ -74,9 +78,13 @@ def _synthesize( # Draw the word text d = ImageDraw.Draw(response) try: - d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt") - except UnicodeEncodeError: - d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt") + try: + d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt") + except UnicodeEncodeError: + d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt") + # Catch generic exceptions to avoid crashing the whole rendering + except Exception: # pragma: no cover + logging.warning(f"Could not render word: {word_text}") if draw_proba: confidence = ( @@ -129,12 +137,11 @@ def synthesize_page( h, w = page["dimensions"] response = Image.new("RGB", (w, h), color=(255, 255, 255)) - _warned = False for block in page["blocks"]: # If lines are provided use these to get better rendering results if len(block["lines"]) > 1: for line in block["lines"]: - _warned = _warn_rotation(block, _warned) # pragma: no cover + _warn_rotation(block) # pragma: no cover response = _synthesize( response=response, entry=line, @@ -149,7 +156,7 @@ def synthesize_page( # Otherwise, draw each word else: for line in block["lines"]: - _warned = _warn_rotation(block, _warned) # pragma: no cover + _warn_rotation(block) # pragma: no cover for word in line["words"]: response = _synthesize( response=response, @@ -190,12 +197,10 @@ def synthesize_kie_page( h, w = page["dimensions"] response = Image.new("RGB", (w, h), color=(255, 255, 255)) - _warned = False # Draw each word for predictions in page["predictions"].values(): for prediction in predictions: - _warned = _warn_rotation(prediction, _warned) # pragma: no cover - + _warn_rotation(prediction) # pragma: no cover response = _synthesize( response=response, entry=prediction, diff --git a/tests/common/test_utils_reconstitution.py b/tests/common/test_utils_reconstitution.py index fdb0569b85..1af5a9c44a 100644 --- a/tests/common/test_utils_reconstitution.py +++ b/tests/common/test_utils_reconstitution.py @@ -23,6 +23,12 @@ def test_synthesize_page(): assert isinstance(render_one_line, np.ndarray) assert render_one_line.shape == (*pages[0].dimensions, 3) + # test with a smiley which can't be rendered by unidecode + pages_one_line["blocks"][0]["lines"][0]["words"][0]["text"] = "🤯" + render_one_line = reconstitution.synthesize_page(pages_one_line, draw_proba=True) + assert isinstance(render_one_line, np.ndarray) + assert render_one_line.shape == (*pages[0].dimensions, 3) + def test_synthesize_kie_page(): pages = _mock_kie_pages() From 60d0c796b141847ca14e17544dd1c2d39bf58430 Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 11 Oct 2024 07:41:08 +0200 Subject: [PATCH 8/9] apply suggestions --- tests/common/test_utils_reconstitution.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/common/test_utils_reconstitution.py b/tests/common/test_utils_reconstitution.py index 1af5a9c44a..fdb0569b85 100644 --- a/tests/common/test_utils_reconstitution.py +++ b/tests/common/test_utils_reconstitution.py @@ -23,12 +23,6 @@ def test_synthesize_page(): assert isinstance(render_one_line, np.ndarray) assert render_one_line.shape == (*pages[0].dimensions, 3) - # test with a smiley which can't be rendered by unidecode - pages_one_line["blocks"][0]["lines"][0]["words"][0]["text"] = "🤯" - render_one_line = reconstitution.synthesize_page(pages_one_line, draw_proba=True) - assert isinstance(render_one_line, np.ndarray) - assert render_one_line.shape == (*pages[0].dimensions, 3) - def test_synthesize_kie_page(): pages = _mock_kie_pages() From 6867d9865ab6f530aacf1c85d23641657e701cc3 Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 11 Oct 2024 15:57:09 +0200 Subject: [PATCH 9/9] add poly test case --- tests/common/test_utils_reconstitution.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/common/test_utils_reconstitution.py b/tests/common/test_utils_reconstitution.py index fdb0569b85..be98db89b2 100644 --- a/tests/common/test_utils_reconstitution.py +++ b/tests/common/test_utils_reconstitution.py @@ -23,6 +23,13 @@ def test_synthesize_page(): assert isinstance(render_one_line, np.ndarray) assert render_one_line.shape == (*pages[0].dimensions, 3) + # Test with polygons + pages_poly = pages[0].export() + pages_poly["blocks"][0]["lines"][0]["geometry"] = [(0, 0), (0, 1), (1, 1), (1, 0)] + render_poly = reconstitution.synthesize_page(pages_poly, draw_proba=True) + assert isinstance(render_poly, np.ndarray) + assert render_poly.shape == (*pages[0].dimensions, 3) + def test_synthesize_kie_page(): pages = _mock_kie_pages()