Skip to content

Commit

Permalink
feat(parsing): append enclosures to Telegraph post
Browse files Browse the repository at this point in the history
Signed-off-by: Rongrong <[email protected]>
  • Loading branch information
Rongronggg9 committed Dec 30, 2023
1 parent b5da7a6 commit fa9d3e1
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Enhancements

- **`wsrv.nl` via relay**: Try to use `wsrv.nl` (environment variable `IMAGES_WESERV_NL`) via the media relay server (environment variable `IMG_RELAY_SERVER`). This is a workaround for images from domains/TLDs banned by `wsrv.nl` or CDNs that ban `wsrv.nl`. It can hopefully reduce the frequency of seeing "invalid media" in messages since RSStT uses `wsrv.nl` heavily to convert images into formats accepted by Telegram DCs. See also [#369](https://github.com/Rongronggg9/RSS-to-Telegram-Bot/issues/369).
- **Append enclosures to Telegraph post**: Append enclosures (if any) to Telegraph post if any. Previously, enclosures can only be sent in Telegram messages, but not in Telegraph posts.

### Bug fixes

Expand Down
2 changes: 2 additions & 0 deletions docs/CHANGELOG.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
### 增强

- **经反代的 `wsrv.nl`: 尝试通过媒体反代服务器 (环境变量 `IMG_RELAY_SERVER`) 使用 `wsrv.nl` (环境变量 `IMAGES_WESERV_NL`)。这是对那些来自被 `wsrv.nl` 封禁的域名或将 `wsrv.nl` 封禁的 CDN 的图片的变通解决方案。考虑到 RSStT 大量使用 `wsrv.nl` 将图片转换为 Telegram DC 所接受的格式,这有望减少在消息中见到 "Invalid media" 的频率。另请参阅 [#369](https://github.com/Rongronggg9/RSS-to-Telegram-Bot/issues/369)
- - **Append enclosures to Telegraph post**: Append enclosures (if any) to Telegraph post if any. Previously, enclosures can only be sent in Telegram messages, but not in Telegraph posts.
- **将 enclosure 附加到 Telegraph 文章**: 如果有的话,将 enclosure (附件) 附加到 Telegraph 文章。先前,enclosure 只能在 Telegram 消息中发送,而无法在 Telegraph 文章中发送。

### Bug 修复

Expand Down
19 changes: 19 additions & 0 deletions src/parsing/medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ async def fallback(self, reason: Union[Exception, str] = None) -> bool:
def type_fallback_chain(self) -> Optional[AbstractMedium]:
pass

@abstractmethod
def get_multimedia_html(self) -> Optional[str]:
pass

@abstractmethod
def get_link_html_node(self) -> Optional[Text]:
pass
Expand Down Expand Up @@ -274,6 +278,12 @@ def type_fallback_chain(self) -> Optional[Medium]:
else None)
) if not self.drop_silently else None

def get_multimedia_html(self) -> str:
url = self.original_urls[0]
if isAbsoluteHttpLink(url):
return f'<a href="{url}">{self.type}</a>'
return f'{self.type} (<code>{url}</code>)'

def get_link_html_node(self) -> Text:
url = self.original_urls[0]
if isAbsoluteHttpLink(url):
Expand Down Expand Up @@ -566,6 +576,9 @@ def __init__(self, urls: Union[str, list[str]]):
for i in range(min(len(urls_not_weserv), 3))) # use for final fallback
self.chosen_url = self.urls[0]

def get_multimedia_html(self) -> str:
return f'<img src="{self.original_urls[0]}" />'

async def change_server(self) -> bool:
if weserv_relayed := insert_image_relay_into_weserv_url(self.chosen_url):
# success if:
Expand Down Expand Up @@ -598,6 +611,9 @@ class Video(Medium):
typeFallbackAllowSelfUrls = False
inputMediaExternalType = InputMediaDocumentExternal

def get_multimedia_html(self) -> str:
return f'<video src="{self.original_urls[0]}" />'


class Audio(Medium):
type = AUDIO
Expand Down Expand Up @@ -659,6 +675,9 @@ def __init__(self, file: Union[bytes, BytesIO, Callable, Awaitable], file_name:
self.file_name = file_name
self.uploaded_file: Union[InputFile, InputFileBig, None] = None

def get_multimedia_html(self) -> None:
return None

def telegramize(self) -> Optional[InputMediaUploadedPhoto]:
if self.valid is None:
raise RuntimeError('validate() must be called before telegramize()')
Expand Down
10 changes: 8 additions & 2 deletions src/parsing/post_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .splitter import get_plain_text_length
from .html_parser import parse
from .html_node import *
from .medium import Media, Image, Video, Audio, File, Animation, construct_weserv_url_convert_to_2560
from .medium import Media, AbstractMedium, Image, Video, Audio, File, Animation, construct_weserv_url_convert_to_2560

AUTO: Final = 0
DISABLE: Final = -1
Expand Down Expand Up @@ -86,6 +86,7 @@ def __init__(self,
self.parsed: bool = False
self.html_tree: Optional[HtmlTree] = None
self.media: Optional[Media] = None
self.enclosure_medium_l: Optional[list[AbstractMedium]] = None
self.parsed_html: Optional[str] = None
self.plain_length: Optional[int] = None
self.telegraph_link: Optional[Union[str, False]] = None # if generating failed, will be False
Expand Down Expand Up @@ -515,6 +516,7 @@ async def parse_html(self):
self.html = parsed.parser.html # use a validated HTML
self.parsed = True
if self.enclosures:
self.enclosure_medium_l = []
for enclosure in self.enclosures:
# https://www.iana.org/assignments/media-types/media-types.xhtml
if not enclosure.url:
Expand Down Expand Up @@ -550,13 +552,17 @@ async def parse_html(self):
else:
medium = File(enclosure.url)
self.media.add(medium)
self.enclosure_medium_l.append(medium)

async def telegraph_ify(self):
if isinstance(self.telegraph_link, str) or self.telegraph_link is False:
return self.telegraph_link

html = self.html
if self.enclosure_medium_l:
html += f"<p>{'<br>'.join(medium.get_multimedia_html() for medium in self.enclosure_medium_l)}</p>"
try:
self.telegraph_link = await tgraph.TelegraphIfy(self.html,
self.telegraph_link = await tgraph.TelegraphIfy(html,
title=self.title,
link=self.link,
feed_title=self.feed_title,
Expand Down

0 comments on commit fa9d3e1

Please sign in to comment.