Skip to content

Commit

Permalink
Fix nested parsing of reference text (#152)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisjsewell authored Jun 12, 2020
1 parent eed7be4 commit 01e12c2
Show file tree
Hide file tree
Showing 24 changed files with 698 additions and 44 deletions.
14 changes: 11 additions & 3 deletions docs/using/syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -757,14 +757,22 @@ Alternatively using the markdown syntax:
[my text](header_target)
```
is synonymous with using the [any inline role](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-any):
is equivalent to using the [any inline role](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-any):
```md
{any}`my text <header_target>`
```
Using the same example, see this ref: [](syntax/targets), and here's a ref back to the top of
this page: [my text](example_syntax).
but can also accept "nested" syntax (like bold text) and will recognise document paths that include extensions (e.g. `using/syntax` or `using/syntax.md`)
Using the same example, see this ref: [](syntax/targets), here is a reference back to the top of
this page: [my text with **nested** $\alpha$ syntax](example_syntax), and here is a reference to another page (`[](intro.md)`): [](intro.md).
```{note}
If you wish to have the target's title inserted into your text, you can
leave the "text" section of the markdown link empty. For example, this
markdown: `[](syntax.md)` will result in: [](syntax.md).
```
(syntax/footnotes)=
Expand Down
2 changes: 2 additions & 0 deletions myst_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
def setup(app):
"""Initialize Sphinx extension."""
from myst_parser.sphinx_parser import MystParser
from myst_parser.myst_refs import MystReferenceResolver

app.add_source_suffix(".md", "markdown")
app.add_source_parser(MystParser)
app.add_config_value("myst_config", {}, "env")
app.add_post_transform(MystReferenceResolver)

return {"version": __version__, "parallel_read_safe": True}
199 changes: 199 additions & 0 deletions myst_parser/myst_refs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
"""A post-transform for overriding the behaviour of sphinx reference resolution.
This is applied to MyST type references only, such as ``[text](target)``,
and allows for nested syntax
"""
import os
from typing import Any, List, Tuple
from typing import cast

from docutils import nodes
from docutils.nodes import document, Element

from sphinx import addnodes
from sphinx.addnodes import pending_xref
from sphinx.errors import NoUri
from sphinx.locale import __
from sphinx.transforms.post_transforms import ReferencesResolver
from sphinx.util import docname_join, logging
from sphinx.util.nodes import clean_astext, make_refnode

logger = logging.getLogger(__name__)


class MystReferenceResolver(ReferencesResolver):
"""Resolves cross-references on doctrees.
Overrides default sphinx implementation, to allow for nested syntax
"""

default_priority = 9 # higher priority than ReferencesResolver (10)

def run(self, **kwargs: Any) -> None:
self.document: document
for node in self.document.traverse(addnodes.pending_xref):
if node["reftype"] != "myst":
continue

contnode = cast(nodes.TextElement, node[0].deepcopy())
newnode = None

typ = node["reftype"]
target = node["reftarget"]
refdoc = node.get("refdoc", self.env.docname)
domain = None

try:
newnode = self.resolve_myst_ref(refdoc, node, contnode)
# no new node found? try the missing-reference event
if newnode is None:
newnode = self.app.emit_firstresult(
"missing-reference", self.env, node, contnode
)
# still not found? warn if node wishes to be warned about or
# we are in nit-picky mode
if newnode is None:
self.warn_missing_reference(refdoc, typ, target, node, domain)
except NoUri:
newnode = contnode

node.replace_self(newnode or contnode)

def _resolve_ref_nested(self, node: pending_xref, fromdocname: str) -> Element:
"""This is the same as ``sphinx.domains.std._resolve_ref_xref``,
but allows for nested syntax,
rather than converting the inner nodes to raw text.
"""
stddomain = self.env.get_domain("std")
target = node["reftarget"].lower()

if node["refexplicit"]:
# reference to anonymous label; the reference uses
# the supplied link caption
docname, labelid = stddomain.anonlabels.get(target, ("", ""))
sectname = node.astext()
innernode = nodes.inline(sectname, "")
innernode.extend(node[0].children)
else:
# reference to named label; the final node will
# contain the section name after the label
docname, labelid, sectname = stddomain.labels.get(target, ("", "", ""))
innernode = nodes.inline(sectname, sectname)

if not docname:
return None

return make_refnode(self.app.builder, fromdocname, docname, labelid, innernode)

def _resolve_doc_nested(self, node: pending_xref, fromdocname: str) -> Element:
"""This is the same as ``sphinx.domains.std._resolve_doc_xref``,
but allows for nested syntax,
rather than converting the inner nodes to raw text.
It also allows for extensions on document names.
"""
# directly reference to document by source name; can be absolute or relative
refdoc = node.get("refdoc", fromdocname)
docname = docname_join(refdoc, node["reftarget"])

if docname not in self.env.all_docs:
# try stripping known extensions from doc name
if os.path.splitext(docname)[1] in self.env.config.source_suffix:
docname = os.path.splitext(docname)[0]
if docname not in self.env.all_docs:
return None

if node["refexplicit"]:
# reference with explicit title
caption = node.astext()
innernode = nodes.inline(caption, "", classes=["doc"])
innernode.extend(node[0].children)
else:
# TODO do we want nested syntax for titles?
caption = clean_astext(self.env.titles[docname])
innernode = nodes.inline(caption, caption, classes=["doc"])

return make_refnode(self.app.builder, fromdocname, docname, None, innernode)

def resolve_myst_ref(
self, refdoc: str, node: pending_xref, contnode: Element
) -> Element:
"""Resolve reference generated by the "myst" role."""

stddomain = self.env.get_domain("std")
target = node["reftarget"]
results = [] # type: List[Tuple[str, Element]]

# resolve standard references first
res = self._resolve_ref_nested(node, refdoc)
if res:
results.append(("std:ref", res))

# next resolve doc names
res = self._resolve_doc_nested(node, refdoc)
if res:
results.append(("std:doc", res))

# next resolve for any other standard reference object
for objtype in stddomain.object_types:
key = (objtype, target)
if objtype == "term":
key = (objtype, target.lower())
if key in stddomain.objects:
docname, labelid = stddomain.objects[key]
domain_role = "std:" + stddomain.role_for_objtype(objtype)
ref_node = make_refnode(
self.app.builder, refdoc, docname, labelid, contnode
)
results.append((domain_role, ref_node))

# finally resolve for any other type of reference
# TODO do we want to restrict this?
for domain in self.env.domains.values():
if domain.name == "std":
continue # we did this one already
try:
results.extend(
domain.resolve_any_xref(
self.env, refdoc, self.app.builder, target, node, contnode
)
)
except NotImplementedError:
# the domain doesn't yet support the new interface
# we have to manually collect possible references (SLOW)
for role in domain.roles:
res = domain.resolve_xref(
self.env, refdoc, self.app.builder, role, target, node, contnode
)
if res and isinstance(res[0], nodes.Element):
results.append((f"{domain.name}:{role}", res))

# now, see how many matches we got...
if not results:
return None
if len(results) > 1:

def stringify(name, node):
reftitle = node.get("reftitle", node.astext())
return f":{name}:`{reftitle}`"

candidates = " or ".join(stringify(name, role) for name, role in results)
logger.warning(
__(
f"more than one target found for 'myst' cross-reference {target}: "
f"could be {candidates}"
),
location=node,
)

res_role, newnode = results[0]
# Override "myst" class with the actual role type to get the styling
# approximately correct.
res_domain = res_role.split(":")[0]
if len(newnode) > 0 and isinstance(newnode[0], nodes.Element):
newnode[0]["classes"] = newnode[0].get("classes", []) + [
res_domain,
res_role.replace(":", "-"),
]

return newnode
26 changes: 26 additions & 0 deletions myst_parser/sphinx_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from os import path
import time

from docutils import frontend, nodes
from docutils.core import publish_doctree
from sphinx.application import Sphinx
from sphinx.io import SphinxStandaloneReader
from sphinx.parsers import Parser
from sphinx.util import logging
from sphinx.util.docutils import sphinx_domains

from myst_parser.main import to_docutils

Expand Down Expand Up @@ -200,3 +207,22 @@ def parse(self, inputstring: str, document: nodes.document):
disable_syntax=self.config["disable_syntax"] or [],
math_delimiters=self.config["math_delimiters"],
)


def parse(app: Sphinx, text: str, docname: str = "index") -> nodes.document:
"""Parse a string as MystMarkdown with Sphinx application."""
app.env.temp_data["docname"] = docname
app.env.all_docs[docname] = time.time()
reader = SphinxStandaloneReader()
reader.setup(app)
parser = MystParser()
parser.set_application(app)
with sphinx_domains(app.env):
return publish_doctree(
text,
path.join(app.srcdir, docname + ".md"),
reader=reader,
parser=parser,
parser_name="markdown",
settings_overrides={"env": app.env, "gettext_compact": True},
)
19 changes: 11 additions & 8 deletions myst_parser/sphinx_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def handle_cross_reference(self, token, destination):
"""Create nodes for references that are not immediately resolvable."""
wrap_node = addnodes.pending_xref(
reftarget=unquote(destination),
reftype="any",
reftype="myst",
refdomain=None, # Added to enable cross-linking
refexplicit=len(token.children) > 0,
refwarn=True,
Expand All @@ -41,9 +41,10 @@ def handle_cross_reference(self, token, destination):
if title:
wrap_node["title"] = title
self.current_node.append(wrap_node)
text_node = nodes.literal("", "", classes=["xref", "any"])
wrap_node.append(text_node)
with self.current_node_context(text_node):

inner_node = nodes.inline("", "", classes=["xref", "myst"])
wrap_node.append(inner_node)
with self.current_node_context(inner_node):
self.render_children(token)

def render_math_block_eqno(self, token):
Expand Down Expand Up @@ -76,7 +77,7 @@ def add_math_target(self, node):
return target


def minimal_sphinx_app(configuration=None, sourcedir=None):
def minimal_sphinx_app(configuration=None, sourcedir=None, with_builder=False):
"""Create a minimal Sphinx environment; loading sphinx roles, directives, etc.
"""

Expand Down Expand Up @@ -106,7 +107,7 @@ def __init__(self, confoverrides=None, srcdir=None):
self.env.temp_data["docname"] = "mock_docname"
self.builder = None

if not confoverrides:
if not with_builder:
return

# this code is only required for more complex parsing with extensions
Expand All @@ -128,7 +129,7 @@ def __init__(self, confoverrides=None, srcdir=None):


@contextmanager
def mock_sphinx_env(conf=None, srcdir=None, document=None):
def mock_sphinx_env(conf=None, srcdir=None, document=None, with_builder=False):
"""Set up an environment, to parse sphinx roles/directives,
outside of a `sphinx-build`.
Expand All @@ -144,7 +145,9 @@ def mock_sphinx_env(conf=None, srcdir=None, document=None):
_roles = copy.copy(roles._roles)
# Monkey-patch directive and role dispatch,
# so that sphinx domain-specific markup takes precedence.
app = minimal_sphinx_app(configuration=conf, sourcedir=srcdir)
app = minimal_sphinx_app(
configuration=conf, sourcedir=srcdir, with_builder=with_builder
)
_sphinx_domains = sphinx_domains(app.env)
_sphinx_domains.enable()
if document is not None:
Expand Down
22 changes: 11 additions & 11 deletions tests/test_renderers/fixtures/syntax_elements.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ bar$ b
.
<document source="notset">
<paragraph>
a
a
<math>
foo
bar
Expand Down Expand Up @@ -390,12 +390,12 @@ Title
<title>
Title
<paragraph>
<pending_xref refdomain="True" refexplicit="True" reftarget="target" reftype="any" refwarn="True">
<literal classes="xref any">
<pending_xref refdomain="True" refexplicit="True" reftarget="target" reftype="myst" refwarn="True">
<inline classes="xref myst">
alt1
<paragraph>
<pending_xref refdomain="True" refexplicit="False" reftarget="target2" reftype="any" refwarn="True">
<literal classes="xref any">
<pending_xref refdomain="True" refexplicit="False" reftarget="target2" reftype="myst" refwarn="True">
<inline classes="xref myst">
<paragraph>
<reference refuri="https://www.google.com">
alt2
Expand Down Expand Up @@ -489,8 +489,8 @@ Link Definition in directive:
<document source="notset">
<note>
<paragraph>
<pending_xref refdomain="True" refexplicit="True" reftarget="link" reftype="any" refwarn="True">
<literal classes="xref any">
<pending_xref refdomain="True" refexplicit="True" reftarget="link" reftype="myst" refwarn="True">
<inline classes="xref myst">
a
.
Expand All @@ -514,8 +514,8 @@ Link Definition in nested directives:
<note>
<note>
<paragraph>
<pending_xref refdomain="True" refexplicit="True" reftarget="link" reftype="any" refwarn="True">
<literal classes="xref any">
<pending_xref refdomain="True" refexplicit="True" reftarget="link" reftype="myst" refwarn="True">
<inline classes="xref myst">
ref1
[ref2]
Expand Down Expand Up @@ -699,6 +699,6 @@ a = 1
<literal_block language="::python" xml:space="preserve">
a = 1
<paragraph>
<pending_xref refdomain="True" refexplicit="False" reftarget="target" reftype="any" refwarn="True">
<literal classes="xref any">
<pending_xref refdomain="True" refexplicit="False" reftarget="target" reftype="myst" refwarn="True">
<inline classes="xref myst">
.
Loading

0 comments on commit 01e12c2

Please sign in to comment.