From 00f866e622797ce84e2eff37c78dea57f32c63d0 Mon Sep 17 00:00:00 2001
From: macimovic <milosacimovic@hotmail.com>
Date: Thu, 12 Sep 2024 22:41:44 +0200
Subject: [PATCH 1/3] Modifying the ocr_predictor API to support
 assume_straight_text which reduces the relience on unreliable crop
 orientation models and reduces speed of execution

---
 doctr/models/kie_predictor/base.py       |  9 ++-
 doctr/models/kie_predictor/pytorch.py    |  7 +-
 doctr/models/kie_predictor/tensorflow.py | 12 +++-
 doctr/models/predictor/base.py           | 26 +++++--
 doctr/models/predictor/pytorch.py        |  7 +-
 doctr/models/predictor/tensorflow.py     |  2 +
 doctr/utils/geometry.py                  | 86 +++++++++++++++++++++++-
 tests/common/test_utils_geometry.py      | 34 ++++++++++
 tests/pytorch/test_models_zoo_pt.py      | 53 ++++++++++-----
 tests/tensorflow/test_models_zoo_tf.py   | 43 ++++++++----
 10 files changed, 239 insertions(+), 40 deletions(-)

diff --git a/doctr/models/kie_predictor/base.py b/doctr/models/kie_predictor/base.py
index 53d807898e..67d6afbf87 100644
--- a/doctr/models/kie_predictor/base.py
+++ b/doctr/models/kie_predictor/base.py
@@ -36,6 +36,7 @@ class _KIEPredictor(_OCRPredictor):
     def __init__(
         self,
         assume_straight_pages: bool = True,
+        assume_straight_text: bool = False,
         straighten_pages: bool = False,
         preserve_aspect_ratio: bool = True,
         symmetric_pad: bool = True,
@@ -43,7 +44,13 @@ def __init__(
         **kwargs: Any,
     ) -> None:
         super().__init__(
-            assume_straight_pages, straighten_pages, preserve_aspect_ratio, symmetric_pad, detect_orientation, **kwargs
+            assume_straight_pages,
+            assume_straight_text,
+            straighten_pages,
+            preserve_aspect_ratio,
+            symmetric_pad,
+            detect_orientation,
+            **kwargs,
         )
 
         self.doc_builder: KIEDocumentBuilder = KIEDocumentBuilder(**kwargs)
diff --git a/doctr/models/kie_predictor/pytorch.py b/doctr/models/kie_predictor/pytorch.py
index 4bcedc7064..c0c50b4a2b 100644
--- a/doctr/models/kie_predictor/pytorch.py
+++ b/doctr/models/kie_predictor/pytorch.py
@@ -29,6 +29,8 @@ class KIEPredictor(nn.Module, _KIEPredictor):
         reco_predictor: recognition module
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
             without rotated textual elements.
+        assume_straight_text: if True, speeds up the inference by assuming you only pass straight text
+            without rotated textual elements.
         straighten_pages: if True, estimates the page general orientation based on the median line orientation.
             Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
             accordingly. Doing so will improve performances for documents with page-uniform rotations.
@@ -44,6 +46,7 @@ def __init__(
         det_predictor: DetectionPredictor,
         reco_predictor: RecognitionPredictor,
         assume_straight_pages: bool = True,
+        assume_straight_text: bool = False,
         straighten_pages: bool = False,
         preserve_aspect_ratio: bool = True,
         symmetric_pad: bool = True,
@@ -57,6 +60,7 @@ def __init__(
         _KIEPredictor.__init__(
             self,
             assume_straight_pages,
+            assume_straight_text,
             straighten_pages,
             preserve_aspect_ratio,
             symmetric_pad,
@@ -129,10 +133,11 @@ def forward(
                 dict_loc_preds[class_name],
                 channels_last=channels_last,
                 assume_straight_pages=self.assume_straight_pages,
+                assume_straight_text=self.assume_straight_text,
             )
         # Rectify crop orientation
         crop_orientations: Any = {}
-        if not self.assume_straight_pages:
+        if not self.assume_straight_pages and not self.assume_straight_text:
             for class_name in dict_loc_preds.keys():
                 crops[class_name], dict_loc_preds[class_name], word_orientations = self._rectify_crops(
                     crops[class_name], dict_loc_preds[class_name]
diff --git a/doctr/models/kie_predictor/tensorflow.py b/doctr/models/kie_predictor/tensorflow.py
index d9d765bbe6..7a39e11e82 100644
--- a/doctr/models/kie_predictor/tensorflow.py
+++ b/doctr/models/kie_predictor/tensorflow.py
@@ -29,6 +29,8 @@ class KIEPredictor(NestedObject, _KIEPredictor):
         reco_predictor: recognition module
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
             without rotated textual elements.
+        assume_straight_text: if True, speeds up the inference by assuming you only pass straight text
+            without rotated textual elements.
         straighten_pages: if True, estimates the page general orientation based on the median line orientation.
             Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
             accordingly. Doing so will improve performances for documents with page-uniform rotations.
@@ -46,6 +48,7 @@ def __init__(
         det_predictor: DetectionPredictor,
         reco_predictor: RecognitionPredictor,
         assume_straight_pages: bool = True,
+        assume_straight_text: bool = False,
         straighten_pages: bool = False,
         preserve_aspect_ratio: bool = True,
         symmetric_pad: bool = True,
@@ -58,6 +61,7 @@ def __init__(
         _KIEPredictor.__init__(
             self,
             assume_straight_pages,
+            assume_straight_text,
             straighten_pages,
             preserve_aspect_ratio,
             symmetric_pad,
@@ -122,12 +126,16 @@ def __call__(
         crops = {}
         for class_name in dict_loc_preds.keys():
             crops[class_name], dict_loc_preds[class_name] = self._prepare_crops(
-                pages, dict_loc_preds[class_name], channels_last=True, assume_straight_pages=self.assume_straight_pages
+                pages,
+                dict_loc_preds[class_name],
+                channels_last=True,
+                assume_straight_pages=self.assume_straight_pages,
+                assume_straight_text=self.assume_straight_text,
             )
 
         # Rectify crop orientation
         crop_orientations: Any = {}
-        if not self.assume_straight_pages:
+        if not self.assume_straight_pages and not self.assume_straight_text:
             for class_name in dict_loc_preds.keys():
                 crops[class_name], dict_loc_preds[class_name], word_orientations = self._rectify_crops(
                     crops[class_name], dict_loc_preds[class_name]
diff --git a/doctr/models/predictor/base.py b/doctr/models/predictor/base.py
index 0469b32ea3..a804de272e 100644
--- a/doctr/models/predictor/base.py
+++ b/doctr/models/predictor/base.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from doctr.models.builder import DocumentBuilder
-from doctr.utils.geometry import extract_crops, extract_rcrops, rotate_image
+from doctr.utils.geometry import extract_crops, extract_dewarped_crops, extract_rcrops, rotate_image
 
 from .._utils import estimate_orientation, rectify_crops, rectify_loc_preds
 from ..classification import crop_orientation_predictor, page_orientation_predictor
@@ -24,6 +24,8 @@ class _OCRPredictor:
     ----
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
             without rotated textual elements.
+        assume_straight_text: if True, speeds up the inference by assuming you only pass straight text
+            without rotated textual elements.
         straighten_pages: if True, estimates the page general orientation based on the median line orientation.
             Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
             accordingly. Doing so will improve performances for documents with page-uniform rotations.
@@ -40,6 +42,7 @@ class _OCRPredictor:
     def __init__(
         self,
         assume_straight_pages: bool = True,
+        assume_straight_text: bool = False,
         straighten_pages: bool = False,
         preserve_aspect_ratio: bool = True,
         symmetric_pad: bool = True,
@@ -47,8 +50,13 @@ def __init__(
         **kwargs: Any,
     ) -> None:
         self.assume_straight_pages = assume_straight_pages
+        self.assume_straight_text = assume_straight_text
         self.straighten_pages = straighten_pages
-        self.crop_orientation_predictor = None if assume_straight_pages else crop_orientation_predictor(pretrained=True)
+        self.crop_orientation_predictor = (
+            None
+            if assume_straight_pages or (not assume_straight_pages and assume_straight_text)
+            else crop_orientation_predictor(pretrained=True)
+        )
         self.page_orientation_predictor = (
             page_orientation_predictor(pretrained=True)
             if detect_orientation or straighten_pages or not assume_straight_pages
@@ -112,8 +120,15 @@ def _generate_crops(
         loc_preds: List[np.ndarray],
         channels_last: bool,
         assume_straight_pages: bool = False,
+        assume_straight_text: bool = False,
     ) -> List[List[np.ndarray]]:
-        extraction_fn = extract_crops if assume_straight_pages else extract_rcrops
+        if assume_straight_pages:
+            extraction_fn = extract_crops
+        else:
+            if assume_straight_text:
+                extraction_fn = extract_dewarped_crops
+            else:
+                extraction_fn = extract_rcrops
 
         crops = [
             extraction_fn(page, _boxes[:, :4], channels_last=channels_last)  # type: ignore[operator]
@@ -127,8 +142,11 @@ def _prepare_crops(
         loc_preds: List[np.ndarray],
         channels_last: bool,
         assume_straight_pages: bool = False,
+        assume_straight_text: bool = False,
     ) -> Tuple[List[List[np.ndarray]], List[np.ndarray]]:
-        crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages)
+        crops = _OCRPredictor._generate_crops(
+            pages, loc_preds, channels_last, assume_straight_pages, assume_straight_text
+        )
 
         # Avoid sending zero-sized crops
         is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops]
diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py
index 7cbf383a06..c74ea70e41 100644
--- a/doctr/models/predictor/pytorch.py
+++ b/doctr/models/predictor/pytorch.py
@@ -29,6 +29,8 @@ class OCRPredictor(nn.Module, _OCRPredictor):
         reco_predictor: recognition module
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
             without rotated textual elements.
+        assume_straight_text: if True, speeds up the inference by assuming you only pass straight text
+            without rotated textual elements.
         straighten_pages: if True, estimates the page general orientation based on the median line orientation.
             Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
             accordingly. Doing so will improve performances for documents with page-uniform rotations.
@@ -44,6 +46,7 @@ def __init__(
         det_predictor: DetectionPredictor,
         reco_predictor: RecognitionPredictor,
         assume_straight_pages: bool = True,
+        assume_straight_text: bool = False,
         straighten_pages: bool = False,
         preserve_aspect_ratio: bool = True,
         symmetric_pad: bool = True,
@@ -57,6 +60,7 @@ def __init__(
         _OCRPredictor.__init__(
             self,
             assume_straight_pages,
+            assume_straight_text,
             straighten_pages,
             preserve_aspect_ratio,
             symmetric_pad,
@@ -123,10 +127,11 @@ def forward(
             loc_preds,
             channels_last=channels_last,
             assume_straight_pages=self.assume_straight_pages,
+            assume_straight_text=self.assume_straight_text,
         )
         # Rectify crop orientation and get crop orientation predictions
         crop_orientations: Any = []
-        if not self.assume_straight_pages:
+        if not self.assume_straight_pages and not self.assume_straight_text:
             crops, loc_preds, _crop_orientations = self._rectify_crops(crops, loc_preds)
             crop_orientations = [
                 {"value": orientation[0], "confidence": orientation[1]} for orientation in _crop_orientations
diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py
index f736614879..7aba80474f 100644
--- a/doctr/models/predictor/tensorflow.py
+++ b/doctr/models/predictor/tensorflow.py
@@ -29,6 +29,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
         reco_predictor: recognition module
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
             without rotated textual elements.
+        assume_straight_text: if True, speeds up the inference by assuming you only pass straight text
         straighten_pages: if True, estimates the page general orientation based on the median line orientation.
             Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
             accordingly. Doing so will improve performances for documents with page-uniform rotations.
@@ -46,6 +47,7 @@ def __init__(
         det_predictor: DetectionPredictor,
         reco_predictor: RecognitionPredictor,
         assume_straight_pages: bool = True,
+        assume_straight_text: bool = False,
         straighten_pages: bool = False,
         preserve_aspect_ratio: bool = True,
         symmetric_pad: bool = True,
diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py
index aceae8ca43..d222c20f50 100644
--- a/doctr/utils/geometry.py
+++ b/doctr/utils/geometry.py
@@ -458,6 +458,8 @@ def extract_rcrops(
         _boxes[:, :, 0] *= width
         _boxes[:, :, 1] *= height
 
+    src_img = img if channels_last else img.transpose(1, 2, 0)
+
     src_pts = _boxes[:, :3].astype(np.float32)
     # Preserve size
     d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
@@ -469,7 +471,7 @@ def extract_rcrops(
     # Use a warp transformation to extract the crop
     crops = [
         cv2.warpAffine(
-            img if channels_last else img.transpose(1, 2, 0),
+            src_img,
             # Transformation matrix
             cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
             (int(d1[idx]), int(d2[idx])),
@@ -477,3 +479,85 @@ def extract_rcrops(
         for idx in range(_boxes.shape[0])
     ]
     return crops  # type: ignore[return-value]
+
+
+def extract_dewarped_crops(
+    img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
+) -> List[np.ndarray]:
+    """Created cropped images from list of skewed/warped bounding boxes,
+    but containing straight text
+
+    Args:
+    ----
+        img: input image
+        polys: bounding boxes of shape (N, 4, 2)
+        dtype: target data type of bounding boxes
+        channels_last: whether the channel dimensions is the last one instead of the last one
+
+    Returns:
+    -------
+        list of cropped images
+    """
+    if polys.shape[0] == 0:
+        return []
+    if polys.shape[1:] != (4, 2):
+        raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)")
+
+    # Project relative coordinates
+    _boxes = polys.copy()
+    height, width = img.shape[:2] if channels_last else img.shape[-2:]
+    if not np.issubdtype(_boxes.dtype, np.integer):
+        _boxes[:, :, 0] *= width
+        _boxes[:, :, 1] *= height
+
+    src_img = img if channels_last else img.transpose(1, 2, 0)
+
+    crops = []
+
+    for box in _boxes:
+        # Sort the points according to the x-axis
+        box_points = box[np.argsort(box[:, 0])]
+
+        # Divide the points into left and right
+        left_points = box_points[:2]
+        right_points = box_points[2:]
+
+        # Sort the left points according to the y-axis
+        left_points = left_points[np.argsort(left_points[:, 1])]
+        # Sort the right points according to the y-axis
+        right_points = right_points[np.argsort(right_points[:, 1])]
+        box_points = np.concatenate([left_points, right_points])
+
+        # Get the width and height of the rectangle that will contain the warped quadrilateral
+        # Designate the width and height based on maximum side of the quadrilateral
+        width_upper = np.linalg.norm(box_points[0] - box_points[2])
+        width_lower = np.linalg.norm(box_points[1] - box_points[3])
+        height_left = np.linalg.norm(box_points[0] - box_points[1])
+        height_right = np.linalg.norm(box_points[2] - box_points[3])
+
+        # Get the maximum width and height
+        rect_width = int(max(width_upper, width_lower))
+        rect_height = int(max(height_left, height_right))
+
+        dst_pts = np.array(
+            [
+                [0, 0],  # top-left
+                # bottom-left
+                [0, rect_height - 1],
+                # top-right
+                [rect_width - 1, 0],
+                # bottom-right
+                [rect_width - 1, rect_height - 1],
+            ],
+            dtype=dtype,
+        )
+
+        # Get the perspective transform matrix using the box points
+        affine_mat = cv2.getPerspectiveTransform(box_points.astype(np.float32), dst_pts)
+
+        # Perform the perspective warp to get the rectified crop
+        crop = cv2.warpPerspective(src_img, affine_mat, (rect_width, rect_height))
+
+        # Add the crop to the list of crops
+        crops.append(crop)
+    return crops  # type: ignore[return-value]
diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py
index 984019e06c..d1216161fc 100644
--- a/tests/common/test_utils_geometry.py
+++ b/tests/common/test_utils_geometry.py
@@ -266,3 +266,37 @@ def test_extract_rcrops(mock_pdf):
 
     # No box
     assert geometry.extract_rcrops(doc_img, np.zeros((0, 4, 2))) == []
+
+
+def test_extract_dewarped_crops(mock_pdf):
+    doc_img = DocumentFile.from_pdf(mock_pdf)[0]
+    num_crops = 2
+    rel_boxes = np.array(
+        [
+            [
+                [idx / num_crops, idx / num_crops],
+                [idx / num_crops + 0.1, idx / num_crops],
+                [idx / num_crops + 0.1, idx / num_crops + 0.1],
+                [idx / num_crops, idx / num_crops],
+            ]
+            for idx in range(num_crops)
+        ],
+        dtype=np.float32,
+    )
+    abs_boxes = deepcopy(rel_boxes)
+    abs_boxes[:, :, 0] *= doc_img.shape[1]
+    abs_boxes[:, :, 1] *= doc_img.shape[0]
+    abs_boxes = abs_boxes.astype(np.int64)
+
+    with pytest.raises(AssertionError):
+        geometry.extract_dewarped_crops(doc_img, np.zeros((1, 8)))
+    for boxes in (rel_boxes, abs_boxes):
+        croped_imgs = geometry.extract_dewarped_crops(doc_img, boxes)
+        # Number of crops
+        assert len(croped_imgs) == num_crops
+        # Data type and shape
+        assert all(isinstance(crop, np.ndarray) for crop in croped_imgs)
+        assert all(crop.ndim == 3 for crop in croped_imgs)
+
+    # No box
+    assert geometry.extract_dewarped_crops(doc_img, np.zeros((0, 4, 2))) == []
diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py
index 9be66edd7b..d927957d88 100644
--- a/tests/pytorch/test_models_zoo_pt.py
+++ b/tests/pytorch/test_models_zoo_pt.py
@@ -25,14 +25,17 @@ def __call__(self, loc_preds):
 
 
 @pytest.mark.parametrize(
-    "assume_straight_pages, straighten_pages",
+    "assume_straight_pages, straighten_pages, assume_straight_text",
     [
-        [True, False],
-        [False, False],
-        [True, True],
+        [True, False, False],
+        [False, False, False],
+        [True, True, False],
+        [True, False, True],
+        [False, False, True],
+        [True, True, True],
     ],
 )
-def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages):
+def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages, assume_straight_text):
     det_bsize = 4
     det_predictor = DetectionPredictor(
         PreProcessor(output_size=(512, 512), batch_size=det_bsize),
@@ -59,6 +62,7 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         det_predictor,
         reco_predictor,
         assume_straight_pages=assume_straight_pages,
+        assume_straight_text=assume_straight_text,
         straighten_pages=straighten_pages,
         detect_orientation=True,
         detect_language=True,
@@ -73,7 +77,10 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         else:
             assert predictor.page_orientation_predictor is None
     else:
-        assert isinstance(predictor.crop_orientation_predictor, nn.Module)
+        if not assume_straight_text:
+            assert isinstance(predictor.crop_orientation_predictor, nn.Module)
+        else:
+            assert predictor.crop_orientation_predictor is None
         assert isinstance(predictor.page_orientation_predictor, nn.Module)
 
     out = predictor(doc)
@@ -97,8 +104,9 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
             predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
             predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)
     else:
-        # Overwrite the default orientation models
-        predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
+        if not assume_straight_text:
+            # Overwrite the default orientation models
+            predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
         predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)
 
     out = predictor(doc)
@@ -114,6 +122,7 @@ def test_trained_ocr_predictor(mock_payslip):
         pretrained=True,
         batch_size=2,
         assume_straight_pages=True,
+        assume_straight_text=False,
         symmetric_pad=True,
         preserve_aspect_ratio=False,
     )
@@ -144,6 +153,7 @@ def test_trained_ocr_predictor(mock_payslip):
         pretrained=True,
         batch_size=2,
         assume_straight_pages=True,
+        assume_straight_text=False,
         preserve_aspect_ratio=True,
         symmetric_pad=True,
     )
@@ -152,6 +162,7 @@ def test_trained_ocr_predictor(mock_payslip):
         det_predictor,
         reco_predictor,
         assume_straight_pages=True,
+        assume_straight_text=False,
         straighten_pages=True,
         preserve_aspect_ratio=True,
         symmetric_pad=True,
@@ -167,14 +178,17 @@ def test_trained_ocr_predictor(mock_payslip):
 
 
 @pytest.mark.parametrize(
-    "assume_straight_pages, straighten_pages",
+    "assume_straight_pages, straighten_pages, assume_straight_text",
     [
-        [True, False],
-        [False, False],
-        [True, True],
+        [True, False, False],
+        [False, False, False],
+        [True, True, False],
+        [True, False, True],
+        [False, False, True],
+        [True, True, True],
     ],
 )
-def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages):
+def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages, assume_straight_text):
     det_bsize = 4
     det_predictor = DetectionPredictor(
         PreProcessor(output_size=(512, 512), batch_size=det_bsize),
@@ -201,6 +215,7 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         det_predictor,
         reco_predictor,
         assume_straight_pages=assume_straight_pages,
+        assume_straight_text=assume_straight_text,
         straighten_pages=straighten_pages,
         detect_orientation=True,
         detect_language=True,
@@ -215,7 +230,10 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         else:
             assert predictor.page_orientation_predictor is None
     else:
-        assert isinstance(predictor.crop_orientation_predictor, nn.Module)
+        if not assume_straight_text:
+            assert isinstance(predictor.crop_orientation_predictor, nn.Module)
+        else:
+            assert predictor.crop_orientation_predictor is None
         assert isinstance(predictor.page_orientation_predictor, nn.Module)
 
     out = predictor(doc)
@@ -239,8 +257,9 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
             predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
             predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)
     else:
-        # Overwrite the default orientation models
-        predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
+        if not assume_straight_text:
+            # Overwrite the default orientation models
+            predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
         predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)
 
     out = predictor(doc)
@@ -265,6 +284,7 @@ def test_trained_kie_predictor(mock_payslip):
         det_predictor,
         reco_predictor,
         assume_straight_pages=True,
+        assume_straight_text=False,
         straighten_pages=True,
         preserve_aspect_ratio=False,
         resolve_blocks=True,
@@ -297,6 +317,7 @@ def test_trained_kie_predictor(mock_payslip):
         det_predictor,
         reco_predictor,
         assume_straight_pages=True,
+        assume_straight_text=False,
         straighten_pages=True,
         preserve_aspect_ratio=True,
         symmetric_pad=True,
diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py
index 4b7e606563..c198f3bdb6 100644
--- a/tests/tensorflow/test_models_zoo_tf.py
+++ b/tests/tensorflow/test_models_zoo_tf.py
@@ -25,14 +25,17 @@ def __call__(self, loc_preds):
 
 
 @pytest.mark.parametrize(
-    "assume_straight_pages, straighten_pages",
+    "assume_straight_pages, straighten_pages, assume_straight_text",
     [
-        [True, False],
-        [False, False],
-        [True, True],
+        [True, False, False],
+        [False, False, False],
+        [True, True, False],
+        [True, False, True],
+        [False, False, True],
+        [True, True, True],
     ],
 )
-def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages):
+def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages, assume_straight_text):
     det_bsize = 4
     det_predictor = DetectionPredictor(
         PreProcessor(output_size=(512, 512), batch_size=det_bsize),
@@ -56,6 +59,7 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         det_predictor,
         reco_predictor,
         assume_straight_pages=assume_straight_pages,
+        assume_straight_text=assume_straight_text,
         straighten_pages=straighten_pages,
         detect_orientation=True,
         detect_language=True,
@@ -70,7 +74,8 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         else:
             assert predictor.page_orientation_predictor is None
     else:
-        assert isinstance(predictor.crop_orientation_predictor, NestedObject)
+        if not assume_straight_text:
+            assert isinstance(predictor.crop_orientation_predictor, NestedObject)
         assert isinstance(predictor.page_orientation_predictor, NestedObject)
 
     out = predictor(doc)
@@ -122,6 +127,7 @@ def test_trained_ocr_predictor(mock_payslip):
         det_predictor,
         reco_predictor,
         assume_straight_pages=True,
+        assume_straight_text=False,
         straighten_pages=True,
         preserve_aspect_ratio=False,
         resolve_blocks=True,
@@ -153,6 +159,7 @@ def test_trained_ocr_predictor(mock_payslip):
         det_predictor,
         reco_predictor,
         assume_straight_pages=True,
+        assume_straight_text=False,
         straighten_pages=True,
         preserve_aspect_ratio=True,
         symmetric_pad=True,
@@ -166,14 +173,17 @@ def test_trained_ocr_predictor(mock_payslip):
 
 
 @pytest.mark.parametrize(
-    "assume_straight_pages, straighten_pages",
+    "assume_straight_pages, straighten_pages, assume_straight_text",
     [
-        [True, False],
-        [False, False],
-        [True, True],
+        [True, False, False],
+        [False, False, False],
+        [True, True, False],
+        [True, False, True],
+        [False, False, True],
+        [True, True, True],
     ],
 )
-def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages):
+def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages, assume_straight_text):
     det_bsize = 4
     det_predictor = DetectionPredictor(
         PreProcessor(output_size=(512, 512), batch_size=det_bsize),
@@ -197,6 +207,7 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         det_predictor,
         reco_predictor,
         assume_straight_pages=assume_straight_pages,
+        assume_straight_text=assume_straight_text,
         straighten_pages=straighten_pages,
         detect_orientation=True,
         detect_language=True,
@@ -211,7 +222,8 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         else:
             assert predictor.page_orientation_predictor is None
     else:
-        assert isinstance(predictor.crop_orientation_predictor, NestedObject)
+        if not assume_straight_text:
+            assert isinstance(predictor.crop_orientation_predictor, NestedObject)
         assert isinstance(predictor.page_orientation_predictor, NestedObject)
 
     out = predictor(doc)
@@ -237,8 +249,9 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
             predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
             predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)
     else:
-        # Overwrite the default orientation models
-        predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
+        if not assume_straight_text:
+            # Overwrite the default orientation models
+            predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
         predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)
 
     out = predictor(doc)
@@ -263,6 +276,7 @@ def test_trained_kie_predictor(mock_payslip):
         det_predictor,
         reco_predictor,
         assume_straight_pages=True,
+        assume_straight_text=False,
         straighten_pages=True,
         preserve_aspect_ratio=False,
         resolve_blocks=True,
@@ -295,6 +309,7 @@ def test_trained_kie_predictor(mock_payslip):
         det_predictor,
         reco_predictor,
         assume_straight_pages=True,
+        assume_straight_text=False,
         straighten_pages=True,
         preserve_aspect_ratio=True,
         symmetric_pad=True,

From 6ca589835695b7bed2f7c48793db6b88a3978cac Mon Sep 17 00:00:00 2001
From: macimovic <milosacimovic@hotmail.com>
Date: Thu, 12 Sep 2024 22:56:29 +0200
Subject: [PATCH 2/3] fix: a fix in a test for pytorch model zoo; wrongly set
 assume_straight_text to a detection_predictor

---
 tests/pytorch/test_models_zoo_pt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py
index d927957d88..0c73d5dd2e 100644
--- a/tests/pytorch/test_models_zoo_pt.py
+++ b/tests/pytorch/test_models_zoo_pt.py
@@ -122,7 +122,6 @@ def test_trained_ocr_predictor(mock_payslip):
         pretrained=True,
         batch_size=2,
         assume_straight_pages=True,
-        assume_straight_text=False,
         symmetric_pad=True,
         preserve_aspect_ratio=False,
     )
@@ -132,6 +131,7 @@ def test_trained_ocr_predictor(mock_payslip):
         det_predictor,
         reco_predictor,
         assume_straight_pages=True,
+        assume_straight_text=False,
         straighten_pages=True,
         preserve_aspect_ratio=False,
         resolve_blocks=True,

From f1128b7f70604a95be74c91eeb5b2b520500cf3e Mon Sep 17 00:00:00 2001
From: macimovic <milosacimovic@hotmail.com>
Date: Thu, 12 Sep 2024 23:00:53 +0200
Subject: [PATCH 3/3] fix: a fix in a test for pytorch model zoo; wrongly set
 assume_straight_text to a detection_predictor

---
 tests/pytorch/test_models_zoo_pt.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py
index 0c73d5dd2e..311b367ee1 100644
--- a/tests/pytorch/test_models_zoo_pt.py
+++ b/tests/pytorch/test_models_zoo_pt.py
@@ -153,7 +153,6 @@ def test_trained_ocr_predictor(mock_payslip):
         pretrained=True,
         batch_size=2,
         assume_straight_pages=True,
-        assume_straight_text=False,
         preserve_aspect_ratio=True,
         symmetric_pad=True,
     )