From 756bbd4895746f31897480c391a4a4e536d59136 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar (UMass)" Date: Thu, 17 Oct 2024 17:45:29 -0700 Subject: [PATCH] add subscript extension --- NEWS | 1 + src/Makevars | 2 +- src/extensions/core-extensions.c | 2 + src/extensions/strikethrough.c | 2 +- src/extensions/subscript.c | 166 +++++++++++++++++++++++++++++++ src/extensions/subscript.h | 9 ++ src/extensions/table.c | 2 + tests/testthat/test-extensions.R | 22 +++- 8 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 src/extensions/subscript.c create mode 100644 src/extensions/subscript.h diff --git a/NEWS b/NEWS index 42c3e65..4bb609c 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,7 @@ 1.9.4 - Apply upstream PR https://github.com/github/cmark-gfm/pull/362 - Require double-tilde `~~` for the strikethrough extension, for consistency with Pandoc's Markdown + - Implement subscript extension for single tilde `~` 1.9.1 - Update libcmark-gfm to 0.29.0.gfm.13 diff --git a/src/Makevars b/src/Makevars index 23ba950..a4c35bc 100644 --- a/src/Makevars +++ b/src/Makevars @@ -10,7 +10,7 @@ LIBCMARK = cmark/cmark.o cmark/node.o cmark/iterator.o cmark/blocks.o cmark/inli cmark/linked_list.o cmark/plugin.o cmark/registry.o cmark/syntax_extension.o \ cmark/plaintext.o cmark/footnotes.o cmark/map.o \ extensions/autolink.o extensions/core-extensions.o extensions/ext_scanners.o \ - extensions/strikethrough.o extensions/table.o extensions/tagfilter.o extensions/tasklist.o + extensions/subscript.o extensions/strikethrough.o extensions/table.o extensions/tagfilter.o extensions/tasklist.o PKG_LIBS = -Lcmark -lstatcmark STATLIB = cmark/libstatcmark.a diff --git a/src/extensions/core-extensions.c b/src/extensions/core-extensions.c index 846e2bc..e685c72 100644 --- a/src/extensions/core-extensions.c +++ b/src/extensions/core-extensions.c @@ -1,5 +1,6 @@ #include "cmark-gfm-core-extensions.h" #include "autolink.h" +#include "subscript.h" #include "strikethrough.h" #include "table.h" #include "tagfilter.h" @@ -9,6 +10,7 @@ static int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_table_extension()); + cmark_plugin_register_syntax_extension(plugin, create_subscript_extension()); cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); diff --git a/src/extensions/strikethrough.c b/src/extensions/strikethrough.c index e088422..cae343d 100644 --- a/src/extensions/strikethrough.c +++ b/src/extensions/strikethrough.c @@ -138,7 +138,7 @@ static void html_render(cmark_syntax_extension *extension, static void plaintext_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - renderer->out(renderer, node, "~", false, LITERAL); + renderer->out(renderer, node, "~~", false, LITERAL); } cmark_syntax_extension *create_strikethrough_extension(void) { diff --git a/src/extensions/subscript.c b/src/extensions/subscript.c new file mode 100644 index 0000000..bda1048 --- /dev/null +++ b/src/extensions/subscript.c @@ -0,0 +1,166 @@ +#include "subscript.h" +#include +#include + +cmark_node_type CMARK_NODE_SUBSCRIPT; + +static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, + cmark_node *parent, unsigned char character, + cmark_inline_parser *inline_parser) { + cmark_node *res = NULL; + int left_flanking, right_flanking, punct_before, punct_after, delims; + char buffer[101]; + + if (character != '~') + return NULL; + + delims = cmark_inline_parser_scan_delimiters( + inline_parser, sizeof(buffer) - 1, '~', + &left_flanking, + &right_flanking, &punct_before, &punct_after); + + memset(buffer, '~', delims); + buffer[delims] = 0; + + res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_node_set_literal(res, buffer); + res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser); + res->start_column = cmark_inline_parser_get_column(inline_parser) - delims; + + if ((left_flanking || right_flanking) && delims == 1) { + cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking, + right_flanking, res); + } + + return res; +} + +static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser, + cmark_inline_parser *inline_parser, delimiter *opener, + delimiter *closer) { + cmark_node *subscript; + cmark_node *tmp, *next; + delimiter *delim, *tmp_delim; + delimiter *res = closer->next; + + subscript = opener->inl_text; + + if (opener->inl_text->as.literal.len != closer->inl_text->as.literal.len) + goto done; + + if (!cmark_node_set_type(subscript, CMARK_NODE_SUBSCRIPT)) + goto done; + + cmark_node_set_syntax_extension(subscript, self); + + tmp = cmark_node_next(opener->inl_text); + + while (tmp) { + if (tmp == closer->inl_text) + break; + next = cmark_node_next(tmp); + cmark_node_append_child(subscript, tmp); + tmp = next; + } + + subscript->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1; + cmark_node_free(closer->inl_text); + +done: + delim = closer; + while (delim != NULL && delim != opener) { + tmp_delim = delim->previous; + cmark_inline_parser_remove_delimiter(inline_parser, delim); + delim = tmp_delim; + } + + cmark_inline_parser_remove_delimiter(inline_parser, opener); + + return res; +} + +static const char *get_type_string(cmark_syntax_extension *extension, + cmark_node *node) { + return node->type == CMARK_NODE_SUBSCRIPT ? "subscript" : ""; +} + +static int can_contain(cmark_syntax_extension *extension, cmark_node *node, + cmark_node_type child_type) { + if (node->type != CMARK_NODE_SUBSCRIPT) + return false; + + return CMARK_NODE_TYPE_INLINE_P(child_type); +} + +static void commonmark_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + renderer->out(renderer, node, "~", false, LITERAL); +} + +static void latex_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + // requires \usepackage{ulem} + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->out(renderer, node, "\\textsubscript{", false, LITERAL); + } else { + renderer->out(renderer, node, "}", false, LITERAL); + } +} + +static void man_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->cr(renderer); + renderer->out(renderer, node, "\\d\\s-2", false, LITERAL); + } else { + renderer->out(renderer, node, "\\s+2\\u", false, LITERAL); + renderer->cr(renderer); + } +} + +static void html_render(cmark_syntax_extension *extension, + cmark_html_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + cmark_strbuf_puts(renderer->html, ""); + } else { + cmark_strbuf_puts(renderer->html, ""); + } +} + +static void plaintext_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + renderer->out(renderer, node, "~", false, LITERAL); +} + +cmark_syntax_extension *create_subscript_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("subscript"); + cmark_llist *special_chars = NULL; + + cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); + cmark_syntax_extension_set_can_contain_func(ext, can_contain); + cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); + cmark_syntax_extension_set_latex_render_func(ext, latex_render); + cmark_syntax_extension_set_man_render_func(ext, man_render); + cmark_syntax_extension_set_html_render_func(ext, html_render); + cmark_syntax_extension_set_plaintext_render_func(ext, plaintext_render); + CMARK_NODE_SUBSCRIPT = cmark_syntax_extension_add_node(1); + + cmark_syntax_extension_set_match_inline_func(ext, match); + cmark_syntax_extension_set_inline_from_delim_func(ext, insert); + + cmark_mem *mem = cmark_get_default_mem_allocator(); + special_chars = cmark_llist_append(mem, special_chars, (void *)'~'); + cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + + cmark_syntax_extension_set_emphasis(ext, 1); + + return ext; +} diff --git a/src/extensions/subscript.h b/src/extensions/subscript.h new file mode 100644 index 0000000..3daa20a --- /dev/null +++ b/src/extensions/subscript.h @@ -0,0 +1,9 @@ +#ifndef CMARK_GFM_SUBSCRIPT_H +#define CMARK_GFM_SUBSCRIPT_H + +#include "cmark-gfm-core-extensions.h" + +extern cmark_node_type CMARK_NODE_SUBSCRIPT; +cmark_syntax_extension *create_subscript_extension(void); + +#endif diff --git a/src/extensions/table.c b/src/extensions/table.c index e8359f2..1feb480 100644 --- a/src/extensions/table.c +++ b/src/extensions/table.c @@ -8,6 +8,7 @@ #include "ext_scanners.h" #include "strikethrough.h" +#include "subscript.h" #include "table.h" #include "cmark-gfm-core-extensions.h" @@ -548,6 +549,7 @@ static int can_contain(cmark_syntax_extension *extension, cmark_node *node, child_type == CMARK_NODE_EMPH || child_type == CMARK_NODE_STRONG || child_type == CMARK_NODE_LINK || child_type == CMARK_NODE_IMAGE || child_type == CMARK_NODE_STRIKETHROUGH || + child_type == CMARK_NODE_SUBSCRIPT || child_type == CMARK_NODE_HTML_INLINE || child_type == CMARK_NODE_FOOTNOTE_REFERENCE; } diff --git a/tests/testthat/test-extensions.R b/tests/testthat/test-extensions.R index bcd27ef..396e212 100644 --- a/tests/testthat/test-extensions.R +++ b/tests/testthat/test-extensions.R @@ -1,7 +1,7 @@ context("test-extensions") test_that("list extensions", { - expect_equal(list_extensions(), c("table", "strikethrough", "autolink", "tagfilter", "tasklist")) + expect_setequal(list_extensions(), c("table", "strikethrough", "subscript", "autolink", "tagfilter", "tasklist")) }) test_that("strikethrough", { @@ -23,6 +23,26 @@ test_that("strikethrough", { }) +test_that("subscript", { + md <- "H~2~O" + expect_equal(markdown_html(md), "

H~2~O

\n") + expect_equal(markdown_html(md, extensions = "subscript"), "

H2O

\n") + + expect_equal(markdown_latex(md), "H\\textasciitilde{}2\\textasciitilde{}O\n") + expect_equal(markdown_latex(md, extensions = "subscript"), "H\\textsubscript{2}O\n") + + expect_equal(markdown_man(md), ".PP\nH~2~O\n") + expect_equal(markdown_man(md, extensions = "subscript"), ".PP\nH\n\\d\\s-22\\s+2\\u\nO\n") + + library(xml2) + doc1 <- xml_ns_strip(read_xml(markdown_xml(md))) + doc2 <- xml_ns_strip(read_xml(markdown_xml(md, extensions = "subscript"))) + expect_length(xml_find_all(doc1, "//subscript"), 0) + expect_length(xml_find_all(doc2, "//subscript"), 1) + +}) + + test_that("autolink", { md <- "Visit: https://www.test.com" expect_match(markdown_html(md, extensions = FALSE), "^((?!href).)*$", perl = TRUE)