From 9905f81b48488e9c7e13af7deaaee412baeeed6d Mon Sep 17 00:00:00 2001 From: dronperminov Date: Tue, 11 Jun 2024 16:10:00 +0300 Subject: [PATCH] TLDR-704 Add line for divide pages --- dedoc/api/api_utils.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dedoc/api/api_utils.py b/dedoc/api/api_utils.py index c942fefa..ad91f2d8 100644 --- a/dedoc/api/api_utils.py +++ b/dedoc/api/api_utils.py @@ -120,12 +120,20 @@ def json2html(text: str, attachments: Optional[List[ParsedDocument]], tabs: int = 0, table2id: Dict[str, int] = None, - attach2id: Dict[str, int] = None) -> str: + attach2id: Dict[str, int] = None, + prev_page_id: Optional[List[int]] = None) -> str: + if prev_page_id is None: + prev_page_id = [0] + tables = [] if tables is None else tables attachments = [] if attachments is None else attachments table2id = {table.metadata.uid: table_id for table_id, table in enumerate(tables)} if table2id is None else table2id attach2id = {attachment.metadata.uid: attachment_id for attachment_id, attachment in enumerate(attachments)} if attach2id is None else attach2id + if paragraph.metadata.page_id != prev_page_id[0]: + text += f"
Page {prev_page_id[0] + 1}

" + prev_page_id[0] = paragraph.metadata.page_id + ptext = __annotations2html(paragraph=paragraph, table2id=table2id, attach2id=attach2id, tabs=tabs) if paragraph.metadata.hierarchy_level.line_type in [HierarchyLevel.header, HierarchyLevel.root]: @@ -141,7 +149,8 @@ def json2html(text: str, text += ptext for subparagraph in paragraph.subparagraphs: - text = json2html(text=text, paragraph=subparagraph, tables=None, attachments=None, tabs=tabs + 4, table2id=table2id, attach2id=attach2id) + text = json2html(text=text, paragraph=subparagraph, tables=None, attachments=None, tabs=tabs + 4, table2id=table2id, attach2id=attach2id, + prev_page_id=prev_page_id) if tables is not None and len(tables) > 0: text += "

Tables:

"