Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nominal no_std + alloc support to regex-syntax #477

Closed
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions regex-syntax/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,12 @@ homepage = "https://github.com/rust-lang/regex"
description = "A regular expression parser."
workspace = ".."

[features]
default = ["std"]
# Disable this on-by-default feature and add "alloc" to allow use in no_std builds
std = []
# Required for use in no_std builds, presently nightly-only
alloc = []
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need an alloc feature flag if it will always be required — i.e. unless there are plans for core-only support in the future you could just drop this feature flag and do #![cfg_attr(not(feature = "std"), feature(alloc))].

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to leave the door open for that possibility. We can simplify the cfg statements if those plans don't materialize.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trick we do with Rand: make std depend on alloc. This has the advantage that you can use just #[cfg(feature = "alloc")] to feature-gate modules/items requiring an allocator (though also the disadvantage that sometimes you need to check both: #![cfg_attr(all(feature="alloc", not(feature="std")), feature(alloc))]).


[dependencies]
ucd-util = "0.1.0"
20 changes: 13 additions & 7 deletions regex-syntax/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@
Defines an abstract syntax for regular expressions.
*/

use std::cmp::Ordering;
use std::error;
use std::fmt;
use core::cmp::Ordering;
use core::fmt;
#[cfg(feature = "std")]
use core::error;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::boxed::Box;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::{String, Vec};

pub use ast::visitor::{Visitor, visit};

Expand Down Expand Up @@ -179,6 +184,7 @@ pub enum ErrorKind {
__Nonexhaustive,
}

#[cfg(feature = "std")]
impl error::Error for Error {
fn description(&self) -> &str {
use self::ErrorKind::*;
Expand Down Expand Up @@ -229,7 +235,7 @@ impl fmt::Display for ErrorKind {
match *self {
CaptureLimitExceeded => {
write!(f, "exceeded the maximum number of \
capturing groups ({})", ::std::u32::MAX)
capturing groups ({})", ::core::u32::MAX)
}
ClassEscapeInvalid => {
write!(f, "invalid escape sequence found in character class")
Expand Down Expand Up @@ -1366,7 +1372,7 @@ pub enum Flag {
/// space but heap space proportional to the depth of the `Ast`.
impl Drop for Ast {
fn drop(&mut self) {
use std::mem;
use core::mem;

match *self {
Ast::Empty(_)
Expand Down Expand Up @@ -1416,7 +1422,7 @@ impl Drop for Ast {
/// stack space but heap space proportional to the depth of the `ClassSet`.
impl Drop for ClassSet {
fn drop(&mut self) {
use std::mem;
use core::mem;

match *self {
ClassSet::Item(ref item) => {
Expand Down Expand Up @@ -1488,7 +1494,7 @@ mod tests {
#[test]
#[cfg(any(unix, windows))]
fn no_stack_overflow_on_drop() {
use std::thread;
use std_test::thread;

let run = || {
let span = || Span::splat(Position::new(0, 0, 0));
Expand Down
45 changes: 29 additions & 16 deletions regex-syntax/src/ast/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,22 @@
This module provides a regular expression parser.
*/

use std::borrow::Borrow;
use std::cell::{Cell, RefCell};
use std::mem;
use std::result;
use core::cell::{Cell, RefCell};
use core::mem;
use core::result;
#[cfg(feature = "std")]
use core::prelude::v1::*;
#[cfg(feature = "std")]
use core::borrow::Borrow;

#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::borrow::Borrow;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::boxed::Box;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::{String, ToString};
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::vec::Vec;

use ast::{self, Ast, Position, Span};
use either::Either;
Expand Down Expand Up @@ -1530,8 +1542,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
///
/// Assuming the preconditions are met, this routine can never fail.
fn parse_octal(&self) -> ast::Literal {
use std::char;
use std::u32;
use core::char;
use core::u32;

assert!(self.parser().octal);
assert!('0' <= self.char() && self.char() <= '7');
Expand Down Expand Up @@ -1596,8 +1608,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
&self,
kind: ast::HexLiteralKind,
) -> Result<ast::Literal> {
use std::char;
use std::u32;
use core::char;
use core::u32;

let mut scratch = self.parser().scratch.borrow_mut();
scratch.clear();
Expand Down Expand Up @@ -1643,8 +1655,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
&self,
kind: ast::HexLiteralKind,
) -> Result<ast::Literal> {
use std::char;
use std::u32;
use core::char;
use core::u32;

let mut scratch = self.parser().scratch.borrow_mut();
scratch.clear();
Expand Down Expand Up @@ -2123,7 +2135,7 @@ impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
fn increment_depth(&mut self, span: &Span) -> Result<()> {
let new = self.depth.checked_add(1).ok_or_else(|| self.p.error(
span.clone(),
ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
ast::ErrorKind::NestLimitExceeded(::core::u32::MAX),
))?;
let limit = self.p.parser().nest_limit;
if new > limit {
Expand Down Expand Up @@ -2255,7 +2267,8 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {

#[cfg(test)]
mod tests {
use std::ops::Range;
use std_test::prelude::v1::*;
use core::ops::Range;

use ast::{self, Ast, Position, Span};
use super::{Parser, ParserI, ParserBuilder, Primitive};
Expand Down Expand Up @@ -3849,7 +3862,7 @@ bar
Ok(Primitive::Literal(ast::Literal {
span: span(0..pat.len()),
kind: ast::LiteralKind::Octal,
c: ::std::char::from_u32(i).unwrap(),
c: ::core::char::from_u32(i).unwrap(),
})));
}
assert_eq!(
Expand Down Expand Up @@ -3918,7 +3931,7 @@ bar
Ok(Primitive::Literal(ast::Literal {
span: span(0..pat.len()),
kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
c: ::std::char::from_u32(i).unwrap(),
c: ::core::char::from_u32(i).unwrap(),
})));
}

Expand All @@ -3945,7 +3958,7 @@ bar
#[test]
fn parse_hex_four() {
for i in 0..65536 {
let c = match ::std::char::from_u32(i) {
let c = match ::core::char::from_u32(i) {
None => continue,
Some(c) => c,
};
Expand Down Expand Up @@ -4001,7 +4014,7 @@ bar
#[test]
fn parse_hex_eight() {
for i in 0..65536 {
let c = match ::std::char::from_u32(i) {
let c = match ::core::char::from_u32(i) {
None => continue,
Some(c) => c,
};
Expand Down
4 changes: 4 additions & 0 deletions regex-syntax/src/ast/print.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
This module provides a regular expression printer for `Ast`.
*/

#[cfg(feature = "std")]
use std::fmt;
#[cfg(not(feature = "std"))]
use core::fmt;

use ast::{self, Ast};
use ast::visitor::{self, Visitor};
Expand Down Expand Up @@ -415,6 +418,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
#[cfg(test)]
mod tests {
use ast::parse::ParserBuilder;
use std_test::prelude::v1::*;
use super::Printer;

fn roundtrip(given: &str) {
Expand Down
5 changes: 4 additions & 1 deletion regex-syntax/src/ast/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::fmt;
use core::fmt;

#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::vec::Vec;

use ast::{self, Ast};

Expand Down
20 changes: 16 additions & 4 deletions regex-syntax/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
use std::cmp;
use core::cmp;
use core::fmt;
use core::result;
#[cfg(feature = "std")]
use std::error;
use std::fmt;
use std::result;
#[cfg(feature = "std")]
use std::prelude::v1::*;

#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::slice::SliceConcatExt;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::{String, ToString};
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::vec::Vec;

use ast;
use hir;
Expand Down Expand Up @@ -39,6 +49,7 @@ impl From<hir::Error> for Error {
}
}

#[cfg(feature = "std")]
impl error::Error for Error {
fn description(&self) -> &str {
match *self {
Expand Down Expand Up @@ -280,11 +291,12 @@ impl<'p> Spans<'p> {
}

fn repeat_char(c: char, count: usize) -> String {
::std::iter::repeat(c).take(count).collect()
::core::iter::repeat(c).take(count).collect()
}

#[cfg(test)]
mod tests {
use std_test::prelude::v1::*;
use ast::parse::Parser;

// See: https://github.com/rust-lang/regex/issues/464
Expand Down
12 changes: 7 additions & 5 deletions regex-syntax/src/hir/interval.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use std::char;
use std::cmp;
use std::fmt::Debug;
use std::slice;
use std::u8;
use core::char;
use core::cmp;
use core::fmt::Debug;
use core::slice;
use core::u8;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::vec::Vec;

// This module contains an *internal* implementation of interval sets.
//
Expand Down
27 changes: 18 additions & 9 deletions regex-syntax/src/hir/literal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,19 @@
Provides routines for extracting literal prefixes and suffixes from an `Hir`.
*/

use std::cmp;
use std::fmt;
use std::iter;
use std::mem;
use std::ops;
use core::cmp;
use core::fmt;
use core::iter;
use core::mem;
use core::ops;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::borrow::ToOwned;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::boxed::Box;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::{String, ToString};
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::vec::Vec;

use hir::{self, Hir, HirKind};

Expand Down Expand Up @@ -483,7 +491,7 @@ impl Literals {
cls: &hir::ClassUnicode,
reverse: bool,
) -> bool {
use std::char;
use core::char;

if self.class_exceeds_limits(cls_char_count(cls)) {
return false;
Expand Down Expand Up @@ -946,7 +954,7 @@ fn position(needle: &[u8], mut haystack: &[u8]) -> Option<usize> {
}

fn escape_unicode(bytes: &[u8]) -> String {
let show = match ::std::str::from_utf8(bytes) {
let show = match ::core::str::from_utf8(bytes) {
Ok(v) => v.to_string(),
Err(_) => escape_bytes(bytes),
};
Expand Down Expand Up @@ -979,7 +987,7 @@ fn escape_bytes(bytes: &[u8]) -> String {
}

fn escape_byte(byte: u8) -> String {
use std::ascii::escape_default;
use core::ascii::escape_default;

let escaped: Vec<u8> = escape_default(byte).collect();
String::from_utf8_lossy(&escaped).into_owned()
Expand All @@ -999,7 +1007,8 @@ fn cls_byte_count(cls: &hir::ClassBytes) -> usize {

#[cfg(test)]
mod tests {
use std::fmt;
use std_test::prelude::v1::*;
use core::fmt;

use ParserBuilder;
use hir::Hir;
Expand Down
Loading