From d2b116e36a3ca40dfd717a56d9cd1e6159ecba2b Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 27 Aug 2022 15:15:07 -0400 Subject: [PATCH] syntax: add 'std' feature In effect, this adds support for no_std by depending on only core and alloc. There is still currently some benefit to enabling std support, namely, getting the 'std::error::Error' trait impls for the various error types. (Although, it seems like the 'Error' trait is going to get moved to 'core' finally.) Otherwise, the only 'std' things we use are in tests for tweaking stack sizes. This is the first step in an effort to make 'regex' itself work without depending on 'std'. 'regex' itself will be more precarious since it uses things like HashMap and Mutex that we'll need to find a way around. Getting around HashMap is easy (just use BTreeMap), but figuring out how to synchronize the threadpool will be interesting. Ref #476, Ref #477 --- Cargo.toml | 3 +- regex-syntax/Cargo.toml | 3 +- regex-syntax/src/ast/mod.rs | 32 ++++++------ regex-syntax/src/ast/parse.rs | 57 ++++++++++++---------- regex-syntax/src/ast/print.rs | 14 ++++-- regex-syntax/src/ast/visitor.rs | 10 ++-- regex-syntax/src/error.rs | 19 +++++--- regex-syntax/src/hir/interval.rs | 8 ++- regex-syntax/src/hir/literal/mod.rs | 54 +++++++++++--------- regex-syntax/src/hir/mod.rs | 59 ++++++++++++---------- regex-syntax/src/hir/print.rs | 18 +++++-- regex-syntax/src/hir/translate.rs | 26 +++++----- regex-syntax/src/hir/visitor.rs | 2 + regex-syntax/src/lib.rs | 27 ++++++++-- regex-syntax/src/parser.rs | 5 +- regex-syntax/src/unicode.rs | 76 ++++++++++++++++------------- regex-syntax/src/utf8.rs | 11 ++--- regex-syntax/test | 1 + 18 files changed, 246 insertions(+), 179 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 04a192d12..df457afed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,8 @@ finite automata and guarantees linear time matching on all inputs. categories = ["text-processing"] autotests = false exclude = ["/scripts/*", "/.github/*"] -edition = "2018" +edition = "2021" +resolver = "2" [workspace] members = [ diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml index cad160d15..429c882c8 100644 --- a/regex-syntax/Cargo.toml +++ b/regex-syntax/Cargo.toml @@ -13,7 +13,8 @@ edition = "2021" # Features are documented in the "Crate features" section of the crate docs: # https://docs.rs/regex-syntax/*/#crate-features [features] -default = ["unicode"] +default = ["std", "unicode"] +std = [] unicode = [ "unicode-age", diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs index 7c9dae7a0..7329fabbe 100644 --- a/regex-syntax/src/ast/mod.rs +++ b/regex-syntax/src/ast/mod.rs @@ -2,8 +2,9 @@ Defines an abstract syntax for regular expressions. */ -use std::cmp::Ordering; -use std::fmt; +use core::cmp::Ordering; + +use alloc::{boxed::Box, string::String, vec, vec::Vec}; pub use crate::ast::visitor::{visit, Visitor}; @@ -174,23 +175,24 @@ pub enum ErrorKind { UnsupportedLookAround, } +#[cfg(feature = "std")] impl std::error::Error for Error {} -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { crate::error::Formatter::from(self).fmt(f) } } -impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use self::ErrorKind::*; match *self { CaptureLimitExceeded => write!( f, "exceeded the maximum number of \ capturing groups ({})", - ::std::u32::MAX + u32::MAX ), ClassEscapeInvalid => { write!(f, "invalid escape sequence found in character class") @@ -283,8 +285,8 @@ pub struct Span { pub end: Position, } -impl fmt::Debug for Span { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Span { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "Span({:?}, {:?})", self.start, self.end) } } @@ -316,8 +318,8 @@ pub struct Position { pub column: usize, } -impl fmt::Debug for Position { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Position { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "Position(o: {:?}, l: {:?}, c: {:?})", @@ -497,8 +499,8 @@ impl Ast { /// /// This implementation uses constant stack space and heap space proportional /// to the size of the `Ast`. -impl fmt::Display for Ast { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Ast { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use crate::ast::print::Printer; Printer::new().print(self, f) } @@ -1315,7 +1317,7 @@ pub enum Flag { /// space but heap space proportional to the depth of the `Ast`. impl Drop for Ast { fn drop(&mut self) { - use std::mem; + use core::mem; match *self { Ast::Empty(_) @@ -1365,7 +1367,7 @@ impl Drop for Ast { /// stack space but heap space proportional to the depth of the `ClassSet`. impl Drop for ClassSet { fn drop(&mut self) { - use std::mem; + use core::mem; match *self { ClassSet::Item(ref item) => match *item { diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index 6e9c9aca0..f730ee659 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -2,17 +2,26 @@ This module provides a regular expression parser. */ -use std::borrow::Borrow; -use std::cell::{Cell, RefCell}; -use std::mem; -use std::result; - -use crate::ast::{self, Ast, Position, Span}; -use crate::either::Either; - -use crate::is_meta_character; - -type Result = result::Result; +use core::{ + borrow::Borrow, + cell::{Cell, RefCell}, + mem, +}; + +use alloc::{ + boxed::Box, + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::{ + ast::{self, Ast, Position, Span}, + either::Either, + is_meta_character, +}; + +type Result = core::result::Result; /// A primitive is an expression with no sub-expressions. This includes /// literals, assertions and non-set character classes. This representation @@ -1533,9 +1542,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// Assuming the preconditions are met, this routine can never fail. #[inline(never)] fn parse_octal(&self) -> ast::Literal { - use std::char; - use std::u32; - assert!(self.parser().octal); assert!('0' <= self.char() && self.char() <= '7'); let start = self.pos(); @@ -1600,9 +1606,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { &self, kind: ast::HexLiteralKind, ) -> Result { - use std::char; - use std::u32; - let mut scratch = self.parser().scratch.borrow_mut(); scratch.clear(); @@ -1646,9 +1649,6 @@ impl<'s, P: Borrow> ParserI<'s, P> { &self, kind: ast::HexLiteralKind, ) -> Result { - use std::char; - use std::u32; - let mut scratch = self.parser().scratch.borrow_mut(); scratch.clear(); @@ -2146,7 +2146,7 @@ impl<'p, 's, P: Borrow> NestLimiter<'p, 's, P> { let new = self.depth.checked_add(1).ok_or_else(|| { self.p.error( span.clone(), - ast::ErrorKind::NestLimitExceeded(::std::u32::MAX), + ast::ErrorKind::NestLimitExceeded(u32::MAX), ) })?; let limit = self.p.parser().nest_limit; @@ -2297,11 +2297,14 @@ fn specialize_err( #[cfg(test)] mod tests { - use std::ops::Range; + use core::ops::Range; + + use alloc::format; - use super::{Parser, ParserBuilder, ParserI, Primitive}; use crate::ast::{self, Ast, Position, Span}; + use super::*; + // Our own assert_eq, which has slightly better formatting (but honestly // still kind of crappy). macro_rules! assert_eq { @@ -4272,7 +4275,7 @@ bar Ok(Primitive::Literal(ast::Literal { span: span(0..pat.len()), kind: ast::LiteralKind::Octal, - c: ::std::char::from_u32(i).unwrap(), + c: char::from_u32(i).unwrap(), })) ); } @@ -4347,7 +4350,7 @@ bar Ok(Primitive::Literal(ast::Literal { span: span(0..pat.len()), kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X), - c: ::std::char::from_u32(i).unwrap(), + c: char::from_u32(i).unwrap(), })) ); } @@ -4378,7 +4381,7 @@ bar #[test] fn parse_hex_four() { for i in 0..65536 { - let c = match ::std::char::from_u32(i) { + let c = match char::from_u32(i) { None => continue, Some(c) => c, }; @@ -4442,7 +4445,7 @@ bar #[test] fn parse_hex_eight() { for i in 0..65536 { - let c = match ::std::char::from_u32(i) { + let c = match char::from_u32(i) { None => continue, Some(c) => c, }; diff --git a/regex-syntax/src/ast/print.rs b/regex-syntax/src/ast/print.rs index f6b2462c0..e6c000d57 100644 --- a/regex-syntax/src/ast/print.rs +++ b/regex-syntax/src/ast/print.rs @@ -2,10 +2,13 @@ This module provides a regular expression printer for `Ast`. */ -use std::fmt; +use core::fmt; -use crate::ast::visitor::{self, Visitor}; -use crate::ast::{self, Ast}; +use crate::ast::{ + self, + visitor::{self, Visitor}, + Ast, +}; /// A builder for constructing a printer. /// @@ -395,9 +398,12 @@ impl Writer { #[cfg(test)] mod tests { - use super::Printer; + use alloc::string::String; + use crate::ast::parse::ParserBuilder; + use super::*; + fn roundtrip(given: &str) { roundtrip_with(|b| b, given); } diff --git a/regex-syntax/src/ast/visitor.rs b/regex-syntax/src/ast/visitor.rs index 78ee487cf..03f8bf963 100644 --- a/regex-syntax/src/ast/visitor.rs +++ b/regex-syntax/src/ast/visitor.rs @@ -1,4 +1,4 @@ -use std::fmt; +use alloc::{vec, vec::Vec}; use crate::ast::{self, Ast}; @@ -475,8 +475,8 @@ impl<'a> ClassInduct<'a> { } } -impl<'a> fmt::Debug for ClassFrame<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl<'a> core::fmt::Debug for ClassFrame<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let x = match *self { ClassFrame::Union { .. } => "Union", ClassFrame::Binary { .. } => "Binary", @@ -487,8 +487,8 @@ impl<'a> fmt::Debug for ClassFrame<'a> { } } -impl<'a> fmt::Debug for ClassInduct<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl<'a> core::fmt::Debug for ClassInduct<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let x = match *self { ClassInduct::Item(it) => match *it { ast::ClassSetItem::Empty(_) => "Item(Empty)", diff --git a/regex-syntax/src/error.rs b/regex-syntax/src/error.rs index 6e7fa7466..a10230a87 100644 --- a/regex-syntax/src/error.rs +++ b/regex-syntax/src/error.rs @@ -1,9 +1,13 @@ -use std::cmp; -use std::fmt; -use std::result; +use core::{cmp, fmt, result}; -use crate::ast; -use crate::hir; +use alloc::{ + format, + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::{ast, hir}; /// A type alias for dealing with errors returned by this crate. pub type Result = result::Result; @@ -35,6 +39,7 @@ impl From for Error { } } +#[cfg(feature = "std")] impl std::error::Error for Error {} impl fmt::Display for Error { @@ -266,11 +271,13 @@ impl<'p> Spans<'p> { } fn repeat_char(c: char, count: usize) -> String { - ::std::iter::repeat(c).take(count).collect() + core::iter::repeat(c).take(count).collect() } #[cfg(test)] mod tests { + use alloc::string::ToString; + use crate::ast::parse::Parser; fn assert_panic_message(pattern: &str, expected_msg: &str) { diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index d6e83f7b2..fbe772ea4 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -1,8 +1,6 @@ -use std::char; -use std::cmp; -use std::fmt::Debug; -use std::slice; -use std::u8; +use core::{char, cmp, fmt::Debug, slice}; + +use alloc::vec::Vec; use crate::unicode; diff --git a/regex-syntax/src/hir/literal/mod.rs b/regex-syntax/src/hir/literal/mod.rs index 58b8871ed..d49cffd92 100644 --- a/regex-syntax/src/hir/literal/mod.rs +++ b/regex-syntax/src/hir/literal/mod.rs @@ -2,11 +2,15 @@ Provides routines for extracting literal prefixes and suffixes from an `Hir`. */ -use std::cmp; -use std::fmt; -use std::iter; -use std::mem; -use std::ops; +use core::{cmp, iter, mem, ops}; + +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, + vec, + vec::Vec, +}; use crate::hir::{self, Hir, HirKind}; @@ -408,7 +412,7 @@ impl Literals { } if self.lits.is_empty() { let i = cmp::min(self.limit_size, bytes.len()); - self.lits.push(Literal::new(bytes[..i].to_owned())); + self.lits.push(Literal::new(bytes[..i].to_vec())); self.lits[0].cut = i < bytes.len(); return !self.lits[0].is_cut(); } @@ -465,8 +469,6 @@ impl Literals { cls: &hir::ClassUnicode, reverse: bool, ) -> bool { - use std::char; - if self.class_exceeds_limits(cls_char_count(cls)) { return false; } @@ -837,8 +839,8 @@ fn alternate_literals( } } -impl fmt::Debug for Literals { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Literals { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("Literals") .field("lits", &self.lits) .field("limit_size", &self.limit_size) @@ -881,8 +883,8 @@ impl PartialOrd for Literal { } } -impl fmt::Debug for Literal { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Literal { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { if self.is_cut() { write!(f, "Cut({})", escape_unicode(&self.v)) } else { @@ -923,7 +925,7 @@ fn position(needle: &[u8], mut haystack: &[u8]) -> Option { } fn escape_unicode(bytes: &[u8]) -> String { - let show = match ::std::str::from_utf8(bytes) { + let show = match core::str::from_utf8(bytes) { Ok(v) => v.to_string(), Err(_) => escape_bytes(bytes), }; @@ -955,7 +957,7 @@ fn escape_bytes(bytes: &[u8]) -> String { } fn escape_byte(byte: u8) -> String { - use std::ascii::escape_default; + use core::ascii::escape_default; let escaped: Vec = escape_default(byte).collect(); String::from_utf8_lossy(&escaped).into_owned() @@ -971,11 +973,15 @@ fn cls_byte_count(cls: &hir::ClassBytes) -> usize { #[cfg(test)] mod tests { - use std::fmt; + use alloc::{ + string::{String, ToString}, + vec, + vec::Vec, + }; + + use crate::{hir::Hir, ParserBuilder}; - use super::{escape_bytes, Literal, Literals}; - use crate::hir::Hir; - use crate::ParserBuilder; + use super::*; // To make test failures easier to read. #[derive(Debug, Eq, PartialEq)] @@ -1013,8 +1019,8 @@ mod tests { } } - impl fmt::Debug for ULiteral { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + impl core::fmt::Debug for ULiteral { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { if self.is_cut() { write!(f, "Cut({})", self.v) } else { @@ -1037,11 +1043,11 @@ mod tests { #[allow(non_snake_case)] fn C(s: &'static str) -> ULiteral { - ULiteral { v: s.to_owned(), cut: true } + ULiteral { v: s.to_string(), cut: true } } #[allow(non_snake_case)] fn M(s: &'static str) -> ULiteral { - ULiteral { v: s.to_owned(), cut: false } + ULiteral { v: s.to_string(), cut: false } } fn prefixes(lits: &mut Literals, expr: &Hir) { @@ -1626,7 +1632,7 @@ mod tests { let given: Vec = $given .into_iter() .map(|s: &str| Literal { - v: s.to_owned().into_bytes(), + v: s.to_string().into_bytes(), cut: false, }) .collect(); @@ -1661,7 +1667,7 @@ mod tests { let given: Vec = $given .into_iter() .map(|s: &str| Literal { - v: s.to_owned().into_bytes(), + v: s.to_string().into_bytes(), cut: false, }) .collect(); diff --git a/regex-syntax/src/hir/mod.rs b/regex-syntax/src/hir/mod.rs index b16df20c8..2af769e92 100644 --- a/regex-syntax/src/hir/mod.rs +++ b/regex-syntax/src/hir/mod.rs @@ -1,18 +1,27 @@ /*! Defines a high-level intermediate representation for regular expressions. */ -use std::char; -use std::cmp; -use std::fmt; -use std::result; -use std::u8; -use crate::ast::Span; -use crate::hir::interval::{Interval, IntervalSet, IntervalSetIter}; -use crate::unicode; +use core::{char, cmp}; -pub use crate::hir::visitor::{visit, Visitor}; -pub use crate::unicode::CaseFoldError; +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::{ + ast::Span, + hir::interval::{Interval, IntervalSet, IntervalSetIter}, + unicode, +}; + +pub use crate::{ + hir::visitor::{visit, Visitor}, + unicode::CaseFoldError, +}; mod interval; pub mod literal; @@ -80,16 +89,17 @@ pub enum ErrorKind { UnicodeCaseUnavailable, } +#[cfg(feature = "std")] impl std::error::Error for Error {} -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { crate::error::Formatter::from(self).fmt(f) } } -impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use self::ErrorKind::*; let msg = match *self { @@ -197,8 +207,7 @@ impl Hir { /// Consumes ownership of this HIR expression and returns its underlying /// `HirKind`. pub fn into_kind(mut self) -> HirKind { - use std::mem; - mem::replace(&mut self.kind, HirKind::Empty) + core::mem::replace(&mut self.kind, HirKind::Empty) } /// Returns an empty HIR expression. @@ -704,8 +713,8 @@ impl HirKind { /// /// This implementation uses constant stack space and heap space proportional /// to the size of the `Hir`. -impl fmt::Display for Hir { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Hir { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use crate::hir::print::Printer; Printer::new().print(self, f) } @@ -800,7 +809,7 @@ impl Class { /// Unicode oriented. pub fn try_case_fold_simple( &mut self, - ) -> result::Result<(), CaseFoldError> { + ) -> core::result::Result<(), CaseFoldError> { match *self { Class::Unicode(ref mut x) => x.try_case_fold_simple()?, Class::Bytes(ref mut x) => x.case_fold_simple(), @@ -909,7 +918,7 @@ impl ClassUnicode { /// `unicode-case` feature is not enabled. pub fn try_case_fold_simple( &mut self, - ) -> result::Result<(), CaseFoldError> { + ) -> core::result::Result<(), CaseFoldError> { self.set.case_fold_simple() } @@ -981,8 +990,8 @@ pub struct ClassUnicodeRange { end: char, } -impl fmt::Debug for ClassUnicodeRange { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for ClassUnicodeRange { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let start = if !self.start.is_whitespace() && !self.start.is_control() { self.start.to_string() @@ -1285,8 +1294,8 @@ impl ClassBytesRange { } } -impl fmt::Debug for ClassBytesRange { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for ClassBytesRange { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let mut debug = f.debug_struct("ClassBytesRange"); if self.start <= 0x7F { let ch = char::try_from(self.start).unwrap(); @@ -1459,7 +1468,7 @@ pub enum RepetitionRange { /// space but heap space proportional to the depth of the total `Hir`. impl Drop for Hir { fn drop(&mut self) { - use std::mem; + use core::mem; match *self.kind() { HirKind::Empty diff --git a/regex-syntax/src/hir/print.rs b/regex-syntax/src/hir/print.rs index 433f9bf11..63d014b1b 100644 --- a/regex-syntax/src/hir/print.rs +++ b/regex-syntax/src/hir/print.rs @@ -2,11 +2,16 @@ This module provides a regular expression printer for `Hir`. */ -use std::fmt; +use core::fmt; -use crate::hir::visitor::{self, Visitor}; -use crate::hir::{self, Hir, HirKind}; -use crate::is_meta_character; +use crate::{ + hir::{ + self, + visitor::{self, Visitor}, + Hir, HirKind, + }, + is_meta_character, +}; /// A builder for constructing a printer. /// @@ -235,9 +240,12 @@ impl Writer { #[cfg(test)] mod tests { - use super::Printer; + use alloc::string::String; + use crate::ParserBuilder; + use super::*; + fn roundtrip(given: &str, expected: &str) { roundtrip_with(|b| b, given, expected); } diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 988384ede..b4338bc94 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -2,14 +2,17 @@ Defines a translator that converts an `Ast` to an `Hir`. */ -use std::cell::{Cell, RefCell}; -use std::result; +use core::cell::{Cell, RefCell}; -use crate::ast::{self, Ast, Span, Visitor}; -use crate::hir::{self, Error, ErrorKind, Hir}; -use crate::unicode::{self, ClassQuery}; +use alloc::{boxed::Box, string::ToString, vec, vec::Vec}; -type Result = result::Result; +use crate::{ + ast::{self, Ast, Span, Visitor}, + hir::{self, Error, ErrorKind, Hir}, + unicode::{self, ClassQuery}, +}; + +type Result = core::result::Result; /// A builder for constructing an AST->HIR translator. #[derive(Clone, Debug)] @@ -1119,12 +1122,13 @@ fn ascii_class_as_chars( #[cfg(test)] mod tests { - use crate::ast::parse::ParserBuilder; - use crate::ast::{self, Ast, Position, Span}; - use crate::hir::{self, Hir, HirKind}; - use crate::unicode::{self, ClassQuery}; + use crate::{ + ast::{self, parse::ParserBuilder, Ast, Position, Span}, + hir::{self, Hir, HirKind}, + unicode::{self, ClassQuery}, + }; - use super::{ascii_class, ascii_class_as_chars, TranslatorBuilder}; + use super::*; // We create these errors to compare with real hir::Errors in the tests. // We define equality between TestError and hir::Error to disregard the diff --git a/regex-syntax/src/hir/visitor.rs b/regex-syntax/src/hir/visitor.rs index 4f5a70909..97771d92f 100644 --- a/regex-syntax/src/hir/visitor.rs +++ b/regex-syntax/src/hir/visitor.rs @@ -1,3 +1,5 @@ +use alloc::{vec, vec::Vec}; + use crate::hir::{self, Hir, HirKind}; /// A trait for visiting the high-level IR (HIR) in depth first order. diff --git a/regex-syntax/src/lib.rs b/regex-syntax/src/lib.rs index 1dfb38af3..287b3417c 100644 --- a/regex-syntax/src/lib.rs +++ b/regex-syntax/src/lib.rs @@ -116,6 +116,11 @@ match semantics of a regular expression. The following features are available: +* **std** - + Enables support for the standard library. This feature is enabled by default. + When disabled, only `core` and `alloc` are used. Otherwise, enabling `std` + generally just enables `std::error::Error` trait impls for the various error + types. * **unicode** - Enables all Unicode features. This feature is enabled by default, and will always cover all Unicode features, even if more are added in the future. @@ -154,13 +159,23 @@ The following features are available: `\p{sb=ATerm}`. */ +#![forbid(unsafe_code)] #![deny(missing_docs)] #![warn(missing_debug_implementations)] -#![forbid(unsafe_code)] +#![no_std] + +#[cfg(any(test, feature = "std"))] +extern crate std; -pub use crate::error::{Error, Result}; -pub use crate::parser::{Parser, ParserBuilder}; -pub use crate::unicode::UnicodeWordError; +extern crate alloc; + +pub use crate::{ + error::{Error, Result}, + parser::{Parser, ParserBuilder}, + unicode::UnicodeWordError, +}; + +use alloc::string::String; pub mod ast; mod either; @@ -248,7 +263,7 @@ pub fn is_word_character(c: char) -> bool { /// returns an error. pub fn try_is_word_character( c: char, -) -> std::result::Result { +) -> core::result::Result { unicode::is_word_character(c) } @@ -265,6 +280,8 @@ pub fn is_word_byte(c: u8) -> bool { #[cfg(test)] mod tests { + use alloc::string::ToString; + use super::*; #[test] diff --git a/regex-syntax/src/parser.rs b/regex-syntax/src/parser.rs index ded95b280..93df72279 100644 --- a/regex-syntax/src/parser.rs +++ b/regex-syntax/src/parser.rs @@ -1,7 +1,4 @@ -use crate::ast; -use crate::hir; - -use crate::Result; +use crate::{ast, hir, Result}; /// A builder for a regular expression parser. /// diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index 0b716f5e6..1689681fa 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -1,11 +1,12 @@ -use std::error; -use std::fmt; -use std::result; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; use crate::hir; /// A type alias for errors specific to Unicode handling of classes. -pub type Result = result::Result; +pub type Result = core::result::Result; /// An inclusive range of codepoints from a generated file (hence the static /// lifetime). @@ -25,7 +26,7 @@ pub enum Error { } /// A type alias for errors specific to Unicode case folding. -pub type FoldResult = result::Result; +pub type FoldResult = core::result::Result; /// An error that occurs when Unicode-aware simple case folding fails. /// @@ -35,10 +36,11 @@ pub type FoldResult = result::Result; #[derive(Debug)] pub struct CaseFoldError(()); -impl error::Error for CaseFoldError {} +#[cfg(feature = "std")] +impl std::error::Error for CaseFoldError {} -impl fmt::Display for CaseFoldError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for CaseFoldError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "Unicode-aware case folding is not available \ @@ -55,10 +57,11 @@ impl fmt::Display for CaseFoldError { #[derive(Debug)] pub struct UnicodeWordError(()); -impl error::Error for UnicodeWordError {} +#[cfg(feature = "std")] +impl std::error::Error for UnicodeWordError {} -impl fmt::Display for UnicodeWordError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for UnicodeWordError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "Unicode-aware \\w class is not available \ @@ -80,21 +83,24 @@ impl fmt::Display for UnicodeWordError { /// This returns an error if the Unicode case folding tables are not available. pub fn simple_fold( c: char, -) -> FoldResult, Option>> { +) -> FoldResult, Option>> +{ #[cfg(not(feature = "unicode-case"))] fn imp( _: char, - ) -> FoldResult, Option>> - { - use std::option::IntoIter; - Err::, _>, _>(CaseFoldError(())) + ) -> FoldResult< + core::result::Result, Option>, + > { + use core::option::IntoIter; + Err::, _>, _>(CaseFoldError(())) } #[cfg(feature = "unicode-case")] fn imp( c: char, - ) -> FoldResult, Option>> - { + ) -> FoldResult< + core::result::Result, Option>, + > { use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; Ok(CASE_FOLDING_SIMPLE @@ -130,8 +136,9 @@ pub fn contains_simple_case_mapping( #[cfg(feature = "unicode-case")] fn imp(start: char, end: char) -> FoldResult { + use core::cmp::Ordering; + use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; - use std::cmp::Ordering; assert!(start <= end); Ok(CASE_FOLDING_SIMPLE @@ -407,17 +414,17 @@ pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode { /// Returns true only if the given codepoint is in the `\w` character class. /// /// If the `unicode-perl` feature is not enabled, then this returns an error. -pub fn is_word_character(c: char) -> result::Result { +pub fn is_word_character( + c: char, +) -> core::result::Result { #[cfg(not(feature = "unicode-perl"))] - fn imp(_: char) -> result::Result { + fn imp(_: char) -> core::result::Result { Err(UnicodeWordError(())) } #[cfg(feature = "unicode-perl")] - fn imp(c: char) -> result::Result { - use crate::is_word_byte; - use crate::unicode_tables::perl_word::PERL_WORD; - use std::cmp::Ordering; + fn imp(c: char) -> core::result::Result { + use crate::{is_word_byte, unicode_tables::perl_word::PERL_WORD}; // MSRV(1.59): Use 'u8::try_from(c)' instead. if u8::try_from(u32::from(c)).map_or(false, is_word_byte) { @@ -425,6 +432,8 @@ pub fn is_word_character(c: char) -> result::Result { } Ok(PERL_WORD .binary_search_by(|&(start, end)| { + use core::cmp::Ordering; + if start <= c && c <= end { Ordering::Equal } else if start > c { @@ -583,7 +592,7 @@ fn property_set( fn ages(canonical_age: &str) -> Result> { #[cfg(not(feature = "unicode-age"))] fn imp(_: &str) -> Result> { - use std::option::IntoIter; + use core::option::IntoIter; Err::, _>(Error::PropertyNotFound) } @@ -884,10 +893,7 @@ fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] { #[cfg(test)] mod tests { - use super::{ - contains_simple_case_mapping, simple_fold, symbolic_name_normalize, - symbolic_name_normalize_bytes, - }; + use super::*; #[cfg(feature = "unicode-case")] fn simple_fold_ok(c: char) -> impl Iterator { @@ -911,23 +917,23 @@ mod tests { #[cfg(feature = "unicode-case")] fn simple_fold_k() { let xs: Vec = simple_fold_ok('k').collect(); - assert_eq!(xs, vec!['K', 'K']); + assert_eq!(xs, alloc::vec!['K', 'K']); let xs: Vec = simple_fold_ok('K').collect(); - assert_eq!(xs, vec!['k', 'K']); + assert_eq!(xs, alloc::vec!['k', 'K']); let xs: Vec = simple_fold_ok('K').collect(); - assert_eq!(xs, vec!['K', 'k']); + assert_eq!(xs, alloc::vec!['K', 'k']); } #[test] #[cfg(feature = "unicode-case")] fn simple_fold_a() { let xs: Vec = simple_fold_ok('a').collect(); - assert_eq!(xs, vec!['A']); + assert_eq!(xs, alloc::vec!['A']); let xs: Vec = simple_fold_ok('A').collect(); - assert_eq!(xs, vec!['a']); + assert_eq!(xs, alloc::vec!['a']); } #[test] diff --git a/regex-syntax/src/utf8.rs b/regex-syntax/src/utf8.rs index b00cd7dba..a75a8afa8 100644 --- a/regex-syntax/src/utf8.rs +++ b/regex-syntax/src/utf8.rs @@ -80,12 +80,9 @@ I also got the idea from which uses it for executing automata on their term index. */ -#![deny(missing_docs)] +use core::{char, fmt, iter::FusedIterator, slice}; -use std::char; -use std::fmt; -use std::iter::FusedIterator; -use std::slice; +use alloc::{vec, vec::Vec}; const MAX_UTF8_BYTES: usize = 4; @@ -457,7 +454,9 @@ fn max_scalar_value(nbytes: usize) -> u32 { #[cfg(test)] mod tests { - use std::char; + use core::char; + + use alloc::{vec, vec::Vec}; use crate::utf8::{Utf8Range, Utf8Sequences}; diff --git a/regex-syntax/test b/regex-syntax/test index 4b1b9fb1a..d03db94b4 100755 --- a/regex-syntax/test +++ b/regex-syntax/test @@ -7,6 +7,7 @@ echo "===== DEFAULT FEATURES ===" cargo test features=( + std unicode unicode-age unicode-bool