Skip to content

Commit

Permalink
rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 committed Nov 15, 2023
1 parent 3fc93cf commit d5c23b3
Show file tree
Hide file tree
Showing 9 changed files with 28 additions and 30 deletions.
10 changes: 8 additions & 2 deletions doctr/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
----
contour: the contour from cv2.findContour
Returns: the maximum shape ratio
Returns:
-------
the maximum shape ratio
"""
_, (w, h), _ = cv2.minAreaRect(contour)
return max(w / h, h / w)
Expand All @@ -32,6 +34,7 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
lines of the document and the assumption that they should be horizontal.
Args:
----
img: the img or bitmap to analyze (H, W, C)
n_ct: the number of contours used for the orientation estimation
ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
Expand All @@ -40,7 +43,6 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
-------
the angle of the general document orientation
"""

assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
if np.max(img) <= 1 and np.min(img) >= 0 or (np.max(img) <= 255 and np.min(img) >= 0 and img.shape[-1] == 1):
thresh = img.astype(np.uint8)
Expand Down Expand Up @@ -119,9 +121,13 @@ def rectify_loc_preds(
def get_language(text: str) -> Tuple[str, float]:
"""Get languages of a text using langdetect model.
Get the language with the highest probability or no language if only a few words or a low probability
Args:
----
text (str): text
Returns:
-------
The detected language in ISO 639 code and confidence score
"""
try:
Expand Down
6 changes: 3 additions & 3 deletions doctr/models/detection/predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def forward(
self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches
]
preds = [pred for batch in predicted_batches for pred in batch["preds"]]
seg_maps = [
pred.permute(1, 2, 0).detach().cpu().numpy() for batch in predicted_batches for pred in batch["out_map"]
]
if return_maps:
seg_maps = [
pred.permute(1, 2, 0).detach().cpu().numpy() for batch in predicted_batches for pred in batch["out_map"]
]
return preds, seg_maps
return preds
2 changes: 1 addition & 1 deletion doctr/models/detection/predictor/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __call__(
]

preds = [pred for batch in predicted_batches for pred in batch["preds"]]
seg_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]]
if return_maps:
seg_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]]
return preds, seg_maps
return preds
11 changes: 4 additions & 7 deletions doctr/models/kie_predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class KIEPredictor(nn.Module, _KIEPredictor):
page. Doing so will slightly deteriorate the overall latency.
detect_language: if True, the language prediction will be added to the predictions for each
page. Doing so will slightly deteriorate the overall latency.
kwargs: keyword args of `DocumentBuilder`
**kwargs: keyword args of `DocumentBuilder`
"""

def __init__(
Expand Down Expand Up @@ -95,10 +95,7 @@ def forward(
if self.detect_orientation
else [estimate_orientation(seq_map) for seq_map in seg_maps]
)
pages = [
rotate_image(page, -angle, expand=False) # type: ignore[arg-type]
for page, angle in zip(pages, origin_page_orientations)
]
pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
# Forward again to get predictions on straight pages
loc_preds = self.det_predictor(pages, **kwargs)

Expand Down Expand Up @@ -146,10 +143,10 @@ def forward(
languages_dict = None

out = self.doc_builder(
pages, # type: ignore[arg-type]
pages,
boxes_per_page,
text_preds_per_page,
origin_page_shapes, # type: ignore[arg-type]
origin_page_shapes,
orientations,
languages_dict,
)
Expand Down
2 changes: 1 addition & 1 deletion doctr/models/kie_predictor/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class KIEPredictor(NestedObject, _KIEPredictor):
page. Doing so will slightly deteriorate the overall latency.
detect_language: if True, the language prediction will be added to the predictions for each
page. Doing so will slightly deteriorate the overall latency.
kwargs: keyword args of `DocumentBuilder`
**kwargs: keyword args of `DocumentBuilder`
"""

_children_names = ["det_predictor", "reco_predictor", "doc_builder"]
Expand Down
2 changes: 1 addition & 1 deletion doctr/models/predictor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class _OCRPredictor:
accordingly. Doing so will improve performances for documents with page-uniform rotations.
preserve_aspect_ratio: if True, resize preserving the aspect ratio (with padding)
symmetric_pad: if True and preserve_aspect_ratio is True, pas the image symmetrically.
kwargs: keyword args of `DocumentBuilder`
**kwargs: keyword args of `DocumentBuilder`
"""

crop_orientation_predictor: Optional[CropOrientationPredictor]
Expand Down
11 changes: 4 additions & 7 deletions doctr/models/predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class OCRPredictor(nn.Module, _OCRPredictor):
page. Doing so will slightly deteriorate the overall latency.
detect_language: if True, the language prediction will be added to the predictions for each
page. Doing so will slightly deteriorate the overall latency.
kwargs: keyword args of `DocumentBuilder`
**kwargs: keyword args of `DocumentBuilder`
"""

def __init__(
Expand Down Expand Up @@ -90,10 +90,7 @@ def forward(
if self.detect_orientation
else [estimate_orientation(seq_map) for seq_map in seg_maps]
)
pages = [
rotate_image(page, -angle, expand=False) # type: ignore[arg-type]
for page, angle in zip(pages, origin_page_orientations)
]
pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
# Forward again to get predictions on straight pages
loc_preds = self.det_predictor(pages, **kwargs)

Expand Down Expand Up @@ -130,10 +127,10 @@ def forward(
languages_dict = None

out = self.doc_builder(
pages, # type: ignore[arg-type]
pages,
boxes,
text_preds,
origin_page_shapes, # type: ignore[arg-type]
origin_page_shapes,
orientations,
languages_dict,
)
Expand Down
6 changes: 2 additions & 4 deletions doctr/models/predictor/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
page. Doing so will slightly deteriorate the overall latency.
detect_language: if True, the language prediction will be added to the predictions for each
page. Doing so will slightly deteriorate the overall latency.
kwargs: keyword args of `DocumentBuilder`
**kwargs: keyword args of `DocumentBuilder`
"""

_children_names = ["det_predictor", "reco_predictor", "doc_builder"]
Expand Down Expand Up @@ -97,9 +97,7 @@ def __call__(
assert all(
len(loc_pred) == 1 for loc_pred in loc_preds_dict
), "Detection Model in ocr_predictor should output only one class"
loc_preds: List[np.ndarray] = [
list(loc_pred.values())[0] for loc_pred in loc_preds_dict # type: ignore[union-attr]
]
loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict] # type: ignore[union-attr]

# Rectify crops if aspect ratio
loc_preds = self._remove_padding(pages, loc_preds)
Expand Down
8 changes: 4 additions & 4 deletions tests/common/test_models_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def test_documentbuilder():
out = doc_builder(pages, [boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
assert isinstance(out, Document)
assert len(out.pages) == num_pages
assert all([isinstance(page.page, np.ndarray) for page in out.pages]) and all(
[page.page.shape == (100, 200, 3) for page in out.pages]
assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all(
page.page.shape == (100, 200, 3) for page in out.pages
)
# 1 Block & 1 line per page
assert len(out.pages[0].blocks) == 1 and len(out.pages[0].blocks[0].lines) == 1
Expand Down Expand Up @@ -82,8 +82,8 @@ def test_kiedocumentbuilder():
)
assert isinstance(out, KIEDocument)
assert len(out.pages) == num_pages
assert all([isinstance(page.page, np.ndarray) for page in out.pages]) and all(
[page.page.shape == (100, 200, 3) for page in out.pages]
assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all(
page.page.shape == (100, 200, 3) for page in out.pages
)
# 1 Block & 1 line per page
assert len(out.pages[0].predictions) == 1
Expand Down

0 comments on commit d5c23b3

Please sign in to comment.