markdown-it-rust · rlidwka · Jun 5, 2023 · Jun 6, 2023 · Jun 6, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,38 @@
 # Changelog
 
+## 0.6.0 - WIP
+
+### Added
+
+ - added `md.try_parse()` function which may return an error, as opposed to existing
+   `md.parse()` function which never does
+
+ - added optional `try_run()` trait function for rules which can fail and will
+   propagate errors when using `md.try_parse()`
+
+### Changed
+
+ - `Node::walk_*` methods now return `Result`, which allows you to terminate traversing early
+ - `syntext` rule now trims spaces in fence info string
+
+### Migration
+
+For all `Node::walk_*` methods change the following:
+
+```rust
+// replace this:
+node.walk(|node, _| {
+    dbg!(node);
+});
+
+// with this (unwrap is safe here because walk only
+// returns error when your function does):
+node.walk(|node, _| {
+    dbg!(node);
+    Ok(())
+}).unwrap();
+```
+
 ## 0.5.0 - 2023-05-13
 
 ### Added

diff --git a/Cargo.toml b/Cargo.toml
@@ -28,13 +28,15 @@ default = ["linkify", "syntect"]
 # Lower range limit of these dependencies was manually checked to work with
 # minimal versions possible, higher range limit is best guess based on semver.
 # So older versions will not work, but newer versions might.
+anyhow       = ">= 1.0.18, < 2"
 argparse     = ">= 0.2.1, < 0.3"
 const_format = ">= 0.1.0, < 0.3"
 derivative   = ">= 1.0.2, < 3"
 derive_more  = ">= 0.99.0, < 1"
 downcast-rs  = ">= 1.0.2, < 2"
 entities     = ">= 0.1.0, < 2"
 html-escape  = ">= 0.1.0, < 0.3"
+indoc        = ">= 0.3.4, < 3"
 linkify      = { version = ">= 0.5.0, < 0.10", optional = true }
 mdurl        = ">= 0.3.1, < 0.4"
 once_cell    = ">= 1.0.1, < 2"

diff --git a/examples/error_handling/README.md b/examples/error_handling/README.md
@@ -0,0 +1 @@
+This is an example of error handling and error propagation in custom markdown-it rules.
diff --git a/examples/error_handling/main.rs b/examples/error_handling/main.rs
@@ -0,0 +1,103 @@
+use markdown_it::parser::block::{BlockRule, BlockState};
+use markdown_it::parser::core::CoreRule;
+use markdown_it::parser::inline::{InlineRule, InlineState};
+use markdown_it::{MarkdownIt, Node, Result};
+use std::error::Error;
+use std::fmt::Display;
+
+#[derive(Debug)]
+struct MyError(&'static str);
+
+impl Display for MyError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.0)
+    }
+}
+
+impl Error for MyError {}
+
+struct FallibleInlineRule;
+
+impl InlineRule for FallibleInlineRule {
+    const MARKER: char = '@';
+
+    // This is implementation of a rule that always fails on `@` character.
+    fn try_run(state: &mut InlineState) -> Result<Option<(Node, usize)>> {
+        // skip other characters
+        if !state.src[state.pos..].starts_with(Self::MARKER) { return Ok(None); };
+
+        Err(MyError("AAA").into())
+    }
+
+    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
+        Self::try_run(state).unwrap_or_default()
+    }
+}
+
+struct FallibleBlockRule;
+
+impl BlockRule for FallibleBlockRule {
+    // This is implementation of a rule that always fails on `@@@` at the start of the line.
+    fn try_run(state: &mut BlockState) -> Result<Option<(Node, usize)>> {
+        if !state.get_line(state.line).starts_with("@@@") { return Ok(None); };
+
+        Err(MyError("BBB").into())
+    }
+
+    fn run(state: &mut BlockState) -> Option<(Node, usize)> {
+        Self::try_run(state).unwrap_or_default()
+    }
+}
+
+struct FallibleCoreRule;
+
+impl CoreRule for FallibleCoreRule {
+    fn try_run(_root: &mut Node, _md: &MarkdownIt) -> Result<()> {
+        Err(MyError("CCC").into())
+    }
+
+    fn run(root: &mut Node, md: &MarkdownIt) {
+        let _ = Self::try_run(root, md);
+    }
+}
+
+fn main() {
+    let md = &mut markdown_it::MarkdownIt::new();
+    markdown_it::plugins::cmark::add(md);
+
+    md.inline.add_rule::<FallibleInlineRule>();
+    md.block.add_rule::<FallibleBlockRule>();
+    md.add_rule::<FallibleCoreRule>().after_all();
+
+    // inline rule fails
+    let text1 = r#"*hello @world*"#;
+    let err = md.try_parse(text1).err().unwrap();
+    println!("{err}");
+    assert_eq!(err.source().unwrap().to_string(), "AAA");
+
+    // block rule fails
+    let text2 = r#"@@@ *hello*"#;
+    let err = md.try_parse(text2).err().unwrap();
+    println!("{err}");
+    assert_eq!(err.source().unwrap().to_string(), "BBB");
+
+    // core rule fails
+    let text3 = r#"*hello*"#;
+    let err = md.try_parse(text3).err().unwrap();
+    println!("{err}");
+    assert_eq!(err.source().unwrap().to_string(), "CCC");
+
+    // If you run parse() function instead of try_parse(), failing rules will be skipped.
+    // This will result in custom syntax being left as user wrote it (not parsed).
+    let html = md.parse(text1).render();
+    print!("{html}");
+    assert_eq!(html, "<p><em>hello @world</em></p>\n");
+
+    let html = md.parse(text2).render();
+    print!("{html}");
+    assert_eq!(html, "<p>@@@ <em>hello</em></p>\n");
+
+    let html = md.parse(text3).render();
+    print!("{html}");
+    assert_eq!(html, "<p><em>hello</em></p>\n");
+}
diff --git a/examples/ferris/core_rule.rs b/examples/ferris/core_rule.rs
@@ -51,7 +51,8 @@ impl CoreRule for FerrisCounterRule {
             if node.is::<InlineFerris>() || node.is::<BlockFerris>() {
                 counter += 1;
             }
-        });
+            Ok(())
+        }).unwrap();
 
         // append a counter to the root as a custom node
         root.children.push(Node::new(FerrisCounter(counter)))

diff --git a/src/bin.rs b/src/bin.rs
@@ -93,7 +93,8 @@ fn main() {
             } else {
                 println!("{name}");
             }
-        });
+            Ok(())
+        }).unwrap();
         return;
     }
 

diff --git a/src/common/ruler.rs b/src/common/ruler.rs
@@ -1,11 +1,9 @@
 //! Plugin manager with dependency resolution.
 
 use derivative::Derivative;
-use once_cell::sync::OnceCell;
 use std::collections::{HashMap, HashSet};
 use std::fmt::Debug;
 use std::hash::Hash;
-use std::slice::Iter;
 
 ///
 /// Ruler allows you to implement a plugin system with dependency management and ensure that
@@ -35,7 +33,7 @@ use std::slice::Iter;
 ///
 /// // now we run this chain
 /// let mut result = String::new();
-/// for f in chain.iter() { f(&mut result); }
+/// for f in chain.compile() { f(&mut result); }
 /// assert_eq!(result, "[ hello, world! ]");
 /// ```
 ///
@@ -50,7 +48,6 @@ use std::slice::Iter;
 ///
 pub struct Ruler<M, T> {
     deps: Vec<RuleItem<M, T>>,
-    compiled: OnceCell<(Vec<usize>, Vec<T>)>,
 }
 
 impl<M, T> Ruler<M, T> {
@@ -62,7 +59,6 @@ impl<M, T> Ruler<M, T> {
 impl<M: Eq + Hash + Copy + Debug, T: Clone> Ruler<M, T> {
     /// Add a new rule identified by `mark` with payload `value`.
     pub fn add(&mut self, mark: M, value: T) -> &mut RuleItem<M, T> {
-        self.compiled = OnceCell::new();
         let dep = RuleItem::new(mark, value);
         self.deps.push(dep);
         self.deps.last_mut().unwrap()
@@ -74,17 +70,12 @@ impl<M: Eq + Hash + Copy + Debug, T: Clone> Ruler<M, T> {
     }
 
     /// Check if there are any rules identified by `mark`.
-    pub fn contains(&mut self, mark: M) -> bool {
+    pub fn contains(&self, mark: M) -> bool {
         self.deps.iter().any(|dep| dep.marks.contains(&mark))
     }
 
-    /// Ordered iteration through rules.
-    #[inline]
-    pub fn iter(&self) -> Iter<T> {
-        self.compiled.get_or_init(|| self.compile()).1.iter()
-    }
-
-    fn compile(&self) -> (Vec<usize>, Vec<T>) {
+    /// Convert dependency tree into an ordered list.
+    pub fn compile(&self) -> Vec<T> {
         // ID -> [RuleItem index]
         let mut idhash = HashMap::<M, Vec<usize>>::new();
 
@@ -206,20 +197,15 @@ impl<M: Eq + Hash + Copy + Debug, T: Clone> Ruler<M, T> {
             panic!("cyclic dependency: (use debug mode for more details)");
         }
 
-        (result_idx, result)
+        //(result_idx, result)
+        result
     }
 }
 
 impl<M: Eq + Hash + Copy + Debug, T: Clone> Debug for Ruler<M, T> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let vec: Vec<(usize, M)> = self.compiled.get_or_init(|| self.compile()).0
-                                    .iter()
-                                    .map(|idx| (*idx, *self.deps.get(*idx).unwrap().marks.get(0).unwrap()))
-                                    .collect();
-
         f.debug_struct("Ruler")
             .field("deps", &self.deps)
-            .field("compiled", &vec)
             .finish()
     }
 }
@@ -228,7 +214,6 @@ impl<M, T> Default for Ruler<M, T> {
     fn default() -> Self {
         Self {
             deps: Vec::new(),
-            compiled: OnceCell::new(),
         }
     }
 }
@@ -267,7 +252,7 @@ impl<M: Copy, T> RuleItem<M, T> {
     /// chain.add("b", |s| s.push_str("foo")).before("a");
     ///
     /// let mut result = String::new();
-    /// for f in chain.iter() { f(&mut result); }
+    /// for f in chain.compile() { f(&mut result); }
     /// assert_eq!(result, "foobar");
     /// ```
     pub fn before(&mut self, mark: M) -> &mut Self {
@@ -293,7 +278,7 @@ impl<M: Copy, T> RuleItem<M, T> {
     /// chain.add("b", |s| s.push_str("B")).after("a").before_all();
     ///
     /// let mut result = String::new();
-    /// for f in chain.iter() { f(&mut result); }
+    /// for f in chain.compile() { f(&mut result); }
     /// // without before_all order will be ACB
     /// assert_eq!(result, "ABC");
     /// ```
@@ -321,7 +306,7 @@ impl<M: Copy, T> RuleItem<M, T> {
     /// chain.add("a", |s| s.push_str("A")).before("BorC");
     ///
     /// let mut result = String::new();
-    /// for f in chain.iter() { f(&mut result); }
+    /// for f in chain.compile() { f(&mut result); }
     /// assert_eq!(result, "ABC");
     /// ```
     pub fn alias(&mut self, mark: M) -> &mut Self {

diff --git a/src/common/typekey.rs b/src/common/typekey.rs
@@ -35,6 +35,11 @@ impl TypeKey {
     pub fn of<T: ?Sized + 'static>() -> Self {
         Self { id: TypeId::of::<T>(), name: any::type_name::<T>() }
     }
+
+    #[must_use]
+    pub fn short_name(&self) -> &str {
+        &self.name[self.name.rfind("::").map(|p| p + 2).unwrap_or(0)..]
+    }
 }
 
 impl Hash for TypeKey {

diff --git a/src/generics/inline/emph_pair.rs b/src/generics/inline/emph_pair.rs
@@ -265,7 +265,10 @@ fn is_odd_match(opener: &EmphMarker, closer: &EmphMarker) -> bool {
 pub struct FragmentsJoin;
 impl CoreRule for FragmentsJoin {
     fn run(node: &mut Node, _: &MarkdownIt) {
-        node.walk_mut(|node, _| fragments_join(node));
+        node.walk_mut(|node, _| {
+            fragments_join(node);
+            Ok(())
+        }).unwrap();
     }
 }
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -18,6 +18,9 @@
 // just a style choice that clippy has no business complaining about
 #![allow(clippy::uninlined_format_args)]
 
+// reexport for using in try_parse apis
+pub use anyhow::Result;
+
 pub mod common;
 pub mod examples;
 pub mod generics;

diff --git a/src/parser/block/builtin/block_parser.rs b/src/parser/block/builtin/block_parser.rs
@@ -1,5 +1,6 @@
 use crate::parser::core::{CoreRule, Root};
-use crate::{MarkdownIt, Node};
+use crate::parser::main::RootNodeWrongType;
+use crate::{MarkdownIt, Node, Result};
 
 pub fn add(md: &mut MarkdownIt) {
     md.add_rule::<BlockParserRule>()
@@ -8,16 +9,39 @@ pub fn add(md: &mut MarkdownIt) {
 
 pub struct BlockParserRule;
 impl CoreRule for BlockParserRule {
+    fn try_run(root: &mut Node, md: &MarkdownIt) -> Result<()> {
+        Self::_run::<true>(root, md)?;
+        Ok(())
+    }
+
     fn run(root: &mut Node, md: &MarkdownIt) {
+        let _ = Self::_run::<false>(root, md);
+    }
+}
+
+impl BlockParserRule {
+    fn _run<const CAN_FAIL: bool>(root: &mut Node, md: &MarkdownIt) -> Result<()> {
         let mut node = std::mem::take(root);
-        let data = node.cast_mut::<Root>().unwrap();
+        let Some(data) = node.cast_mut::<Root>() else {
+            return Err(RootNodeWrongType.into());
+        };
         let source = std::mem::take(&mut data.content);
         let mut ext = std::mem::take(&mut data.ext);
 
-        node = md.block.parse(source.as_str(), node, md, &mut ext);
-        let data = node.cast_mut::<Root>().unwrap();
+        md.block.compile();
+        node = if CAN_FAIL {
+            md.block.try_parse(source.as_str(), node, md, &mut ext)?
+        } else {
+            md.block.parse(source.as_str(), node, md, &mut ext)
+        };
+        *root = node;
+
+        let Some(data) = root.cast_mut::<Root>() else {
+            return Err(RootNodeWrongType.into());
+        };
         data.content = source;
         data.ext = ext;
-        *root = node;
+
+        Ok(())
     }
 }