From fa9d3e147e28c3a3f46b6f87427aec637fbff134 Mon Sep 17 00:00:00 2001
From: Rongrong
Date: Sat, 30 Dec 2023 20:34:24 +0800
Subject: [PATCH] feat(parsing): append enclosures to Telegraph post
Signed-off-by: Rongrong
---
docs/CHANGELOG.md | 1 +
docs/CHANGELOG.zh.md | 2 ++
src/parsing/medium.py | 19 +++++++++++++++++++
src/parsing/post_formatter.py | 10 ++++++++--
4 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index e61046c3cf..0b210a8faf 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -5,6 +5,7 @@
### Enhancements
- **`wsrv.nl` via relay**: Try to use `wsrv.nl` (environment variable `IMAGES_WESERV_NL`) via the media relay server (environment variable `IMG_RELAY_SERVER`). This is a workaround for images from domains/TLDs banned by `wsrv.nl` or CDNs that ban `wsrv.nl`. It can hopefully reduce the frequency of seeing "invalid media" in messages since RSStT uses `wsrv.nl` heavily to convert images into formats accepted by Telegram DCs. See also [#369](https://github.com/Rongronggg9/RSS-to-Telegram-Bot/issues/369).
+- **Append enclosures to Telegraph post**: Append enclosures (if any) to Telegraph post if any. Previously, enclosures can only be sent in Telegram messages, but not in Telegraph posts.
### Bug fixes
diff --git a/docs/CHANGELOG.zh.md b/docs/CHANGELOG.zh.md
index fa5339a1d4..44d872fe53 100644
--- a/docs/CHANGELOG.zh.md
+++ b/docs/CHANGELOG.zh.md
@@ -5,6 +5,8 @@
### 增强
- **经反代的 `wsrv.nl`: 尝试通过媒体反代服务器 (环境变量 `IMG_RELAY_SERVER`) 使用 `wsrv.nl` (环境变量 `IMAGES_WESERV_NL`)。这是对那些来自被 `wsrv.nl` 封禁的域名或将 `wsrv.nl` 封禁的 CDN 的图片的变通解决方案。考虑到 RSStT 大量使用 `wsrv.nl` 将图片转换为 Telegram DC 所接受的格式,这有望减少在消息中见到 "Invalid media" 的频率。另请参阅 [#369](https://github.com/Rongronggg9/RSS-to-Telegram-Bot/issues/369)。
+- - **Append enclosures to Telegraph post**: Append enclosures (if any) to Telegraph post if any. Previously, enclosures can only be sent in Telegram messages, but not in Telegraph posts.
+- **将 enclosure 附加到 Telegraph 文章**: 如果有的话,将 enclosure (附件) 附加到 Telegraph 文章。先前,enclosure 只能在 Telegram 消息中发送,而无法在 Telegraph 文章中发送。
### Bug 修复
diff --git a/src/parsing/medium.py b/src/parsing/medium.py
index 9e7bdde46b..4e20666d06 100644
--- a/src/parsing/medium.py
+++ b/src/parsing/medium.py
@@ -109,6 +109,10 @@ async def fallback(self, reason: Union[Exception, str] = None) -> bool:
def type_fallback_chain(self) -> Optional[AbstractMedium]:
pass
+ @abstractmethod
+ def get_multimedia_html(self) -> Optional[str]:
+ pass
+
@abstractmethod
def get_link_html_node(self) -> Optional[Text]:
pass
@@ -274,6 +278,12 @@ def type_fallback_chain(self) -> Optional[Medium]:
else None)
) if not self.drop_silently else None
+ def get_multimedia_html(self) -> str:
+ url = self.original_urls[0]
+ if isAbsoluteHttpLink(url):
+ return f'{self.type}'
+ return f'{self.type} ({url}
)'
+
def get_link_html_node(self) -> Text:
url = self.original_urls[0]
if isAbsoluteHttpLink(url):
@@ -566,6 +576,9 @@ def __init__(self, urls: Union[str, list[str]]):
for i in range(min(len(urls_not_weserv), 3))) # use for final fallback
self.chosen_url = self.urls[0]
+ def get_multimedia_html(self) -> str:
+ return f''
+
async def change_server(self) -> bool:
if weserv_relayed := insert_image_relay_into_weserv_url(self.chosen_url):
# success if:
@@ -598,6 +611,9 @@ class Video(Medium):
typeFallbackAllowSelfUrls = False
inputMediaExternalType = InputMediaDocumentExternal
+ def get_multimedia_html(self) -> str:
+ return f''
+
class Audio(Medium):
type = AUDIO
@@ -659,6 +675,9 @@ def __init__(self, file: Union[bytes, BytesIO, Callable, Awaitable], file_name:
self.file_name = file_name
self.uploaded_file: Union[InputFile, InputFileBig, None] = None
+ def get_multimedia_html(self) -> None:
+ return None
+
def telegramize(self) -> Optional[InputMediaUploadedPhoto]:
if self.valid is None:
raise RuntimeError('validate() must be called before telegramize()')
diff --git a/src/parsing/post_formatter.py b/src/parsing/post_formatter.py
index 13b376410c..566d71276e 100644
--- a/src/parsing/post_formatter.py
+++ b/src/parsing/post_formatter.py
@@ -11,7 +11,7 @@
from .splitter import get_plain_text_length
from .html_parser import parse
from .html_node import *
-from .medium import Media, Image, Video, Audio, File, Animation, construct_weserv_url_convert_to_2560
+from .medium import Media, AbstractMedium, Image, Video, Audio, File, Animation, construct_weserv_url_convert_to_2560
AUTO: Final = 0
DISABLE: Final = -1
@@ -86,6 +86,7 @@ def __init__(self,
self.parsed: bool = False
self.html_tree: Optional[HtmlTree] = None
self.media: Optional[Media] = None
+ self.enclosure_medium_l: Optional[list[AbstractMedium]] = None
self.parsed_html: Optional[str] = None
self.plain_length: Optional[int] = None
self.telegraph_link: Optional[Union[str, False]] = None # if generating failed, will be False
@@ -515,6 +516,7 @@ async def parse_html(self):
self.html = parsed.parser.html # use a validated HTML
self.parsed = True
if self.enclosures:
+ self.enclosure_medium_l = []
for enclosure in self.enclosures:
# https://www.iana.org/assignments/media-types/media-types.xhtml
if not enclosure.url:
@@ -550,13 +552,17 @@ async def parse_html(self):
else:
medium = File(enclosure.url)
self.media.add(medium)
+ self.enclosure_medium_l.append(medium)
async def telegraph_ify(self):
if isinstance(self.telegraph_link, str) or self.telegraph_link is False:
return self.telegraph_link
+ html = self.html
+ if self.enclosure_medium_l:
+ html += f"
{'
'.join(medium.get_multimedia_html() for medium in self.enclosure_medium_l)}