From 8b2753ae284274abea0464c9ddec66318d78d2fc Mon Sep 17 00:00:00 2001 From: Rongrong Date: Sun, 5 Nov 2023 18:33:04 +0800 Subject: [PATCH] feat(parser): trim linebreaks between blockquote&p Signed-off-by: Rongrong --- src/parsing/html_parser.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/parsing/html_parser.py b/src/parsing/html_parser.py index 2dd5fce954..ac7814d342 100644 --- a/src/parsing/html_parser.py +++ b/src/parsing/html_parser.py @@ -127,10 +127,13 @@ async def _parse_item(self, soup: Union[PageElement, BeautifulSoup, Tag, Navigab if text: if parent == 'li': return text - text_l = [Br(), text] - if not(isinstance(soup.next_sibling, Tag) and soup.next_sibling.name == 'blockquote'): + text_l = [text] + ps, ns = soup.previous_sibling, soup.next_sibling + if not (isinstance(ps, Tag) and ps.name == 'blockquote'): + text_l.insert(0, Br()) + if not (isinstance(ns, Tag) and ns.name == 'blockquote'): text_l.append(Br()) - return Text(text_l) + return Text(text_l) if len(text_l) > 1 else text return None if tag == 'blockquote':