Skip to content

Commit

Permalink
code refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander Golodkov committed Sep 12, 2024
1 parent 7dd3e6e commit 2cc131f
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 17 deletions.
4 changes: 2 additions & 2 deletions dedoc/data_structures/line_with_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ def to_api_schema(self) -> ApiLineWithMeta:
def shift(self, shift_x: int, shift_y: int, image_width: int, image_height: int) -> None:
import json
from dedoc.data_structures.concrete_annotations.bbox_annotation import BBoxAnnotation
for i_ann, annotation in enumerate(self.annotations):
for annotation in self.annotations:
if annotation.name == "bounding box":
bbox, page_width, page_height = BBoxAnnotation.get_bbox_from_value(annotation.value)
bbox.shift(shift_x, shift_y)
self.annotations[i_ann].value = json.dumps(bbox.to_relative_dict(image_width, image_height))
annotation.value = json.dumps(bbox.to_relative_dict(image_width, image_height))
6 changes: 3 additions & 3 deletions dedoc/readers/pdf_reader/data_classes/tables/cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def copy_from(cell: "Cell",
contour_coord=cell.con_coord)

def shift(self, shift_x: int, shift_y: int, image_width: int, image_height: int) -> None:
if self.lines and len(self.lines) >= 1:
for i_lin, _line in enumerate(self.lines):
self.lines[i_lin].shift(shift_x=shift_x, shift_y=shift_y, image_width=image_width, image_height=image_height)
if self.lines:
for line in self.lines:
line.shift(shift_x=shift_x, shift_y=shift_y, image_width=image_width, image_height=image_height)
self.x_top_left += shift_x
self.x_bottom_right += shift_x
self.y_top_left += shift_y
Expand Down
24 changes: 12 additions & 12 deletions dedoc/readers/pdf_reader/pdf_base_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,36 +161,36 @@ def _shift_all_contents(self, lines: List[LineWithMeta], mp_tables: List[ScanTab
gost_analyzed_images: List[Tuple[np.ndarray, BBox]]) -> None:
# shift mp_tables
for scan_table in mp_tables:
for i_loc, location in enumerate(scan_table.locations):
for location in scan_table.locations:
table_page_number = location.page_number
scan_table.locations[i_loc].shift(shift_x=gost_analyzed_images[table_page_number][1].x_top_left,
shift_y=gost_analyzed_images[table_page_number][1].y_top_left)
location.shift(shift_x=gost_analyzed_images[table_page_number][1].x_top_left,
shift_y=gost_analyzed_images[table_page_number][1].y_top_left)
for row in scan_table.matrix_cells:
row_page_number = scan_table.page_number
for cell in row: # check page number information in the current table row, because table can be located on multiple pages
if cell.lines and len(cell.lines) >= 1:
row_page_number = cell.lines[0].metadata.page_id
break
for i_cel, cell in enumerate(row): # if cell doesn't contain page number information we use row_page_number
for cell in row: # if cell doesn't contain page number information we use row_page_number
page_number = cell.lines[0].metadata.page_id if cell.lines and len(cell.lines) >= 1 else row_page_number
image_width, image_height = gost_analyzed_images[page_number][0].shape[1], gost_analyzed_images[page_number][0].shape[0]
shift_x, shift_y = gost_analyzed_images[page_number][1].x_top_left, gost_analyzed_images[page_number][1].y_top_left
row[i_cel].shift(shift_x=shift_x, shift_y=shift_y, image_width=image_width, image_height=image_height)
cell.shift(shift_x=shift_x, shift_y=shift_y, image_width=image_width, image_height=image_height)

# shift attachments
for i_att, attachment in enumerate(attachments):
for attachment in attachments:
attachment_page_number = attachment.location.page_number
shift_x, shift_y = gost_analyzed_images[attachment_page_number][1].x_top_left, gost_analyzed_images[attachment_page_number][1].y_top_left
attachments[i_att].location.shift(shift_x, shift_y)
attachment.location.shift(shift_x, shift_y)

# shift lines
for i_lin, line in enumerate(lines):
for line in lines:
page_number = line.metadata.page_id
image_width, image_height = gost_analyzed_images[page_number][0].shape[1], gost_analyzed_images[page_number][0].shape[0]
lines[i_lin].shift(shift_x=gost_analyzed_images[page_number][1].x_top_left,
shift_y=gost_analyzed_images[page_number][1].y_top_left,
image_width=image_width,
image_height=image_height)
line.shift(shift_x=gost_analyzed_images[page_number][1].x_top_left,
shift_y=gost_analyzed_images[page_number][1].y_top_left,
image_width=image_width,
image_height=image_height)

@abstractmethod
def _process_one_page(self, image: ndarray, parameters: ParametersForParseDoc, page_number: int, path: str) \
Expand Down

0 comments on commit 2cc131f

Please sign in to comment.