From 99826627b20483ea0db6130314e10b7b36eb88b5 Mon Sep 17 00:00:00 2001 From: Angelo Gladding Date: Sun, 3 Dec 2023 18:02:26 -0800 Subject: [PATCH] Parse `metaformats` by default and place in `meta-items` property --- mf2py/parser.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/mf2py/parser.py b/mf2py/parser.py index 0c1bee3..69bc6fd 100644 --- a/mf2py/parser.py +++ b/mf2py/parser.py @@ -19,7 +19,7 @@ from .version import __version__ -def parse(doc=None, url=None, html_parser=None, expose_dom=False, metaformats=False): +def parse(doc=None, url=None, html_parser=None, expose_dom=False): """ Parse a microformats2 document or url and return a json dictionary. @@ -33,8 +33,6 @@ def parse(doc=None, url=None, html_parser=None, expose_dom=False, metaformats=Fa options from the BeautifulSoup documentation are: "html", "xml", "html5", "lxml", "html5lib", and "html.parser" expose_dom (boolean): optional, expose the DOM of embedded properties. - metaformats (boolean): whether to include metaformats extracted from OGP - and Twitter card data: https://microformats.org/wiki/metaformats Return: a json dict represented the structured data in this document. """ @@ -43,7 +41,6 @@ def parse(doc=None, url=None, html_parser=None, expose_dom=False, metaformats=Fa url, html_parser, expose_dom=expose_dom, - metaformats=metaformats, ).to_dict() @@ -62,8 +59,6 @@ class Parser(object): "html", "xml", "html5", "lxml", "html5lib", and "html.parser" defaults to "html5lib" expose_dom (boolean): optional, expose the DOM of embedded properties. - metaformats (boolean): whether to include metaformats extracted from OGP - and Twitter card data: https://microformats.org/wiki/metaformats Attributes: useragent (string): the User-Agent string for the Parser @@ -79,7 +74,6 @@ def __init__( url=None, html_parser=None, expose_dom=False, - metaformats=False, ): self.__url__ = None self.__doc__ = None @@ -94,7 +88,6 @@ def __init__( "version": __version__, }, } - self.__metaformats = metaformats self.expose_dom = expose_dom self.lang = None @@ -513,15 +506,12 @@ def parse_el(el, ctx): ctx = [] - if self.__metaformats: - # extract out a metaformats item, if available - self.__metaformats_item = metaformats.parse(self.__doc__, url=self.__url__) + if metaformats_item := metaformats.parse(self.__doc__, url=self.__url__): + self.__parsed__["meta-item"] = metaformats_item # start parsing at root element of the document parse_el(self.__doc__, ctx) self.__parsed__["items"] = ctx - if self.__metaformats and self.__metaformats_item: - self.__parsed__["items"].append(self.__metaformats_item) # parse for rel values for el in get_descendents(self.__doc__):