diff --git a/Cargo.lock b/Cargo.lock index 3638f2629..c0ba1f49b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -127,6 +127,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "icu_collections" version = "1.5.0" @@ -424,6 +430,7 @@ dependencies = [ "ahash", "base64", "enum_dispatch", + "hex", "idna 1.0.1", "jiter", "num-bigint", diff --git a/Cargo.toml b/Cargo.toml index 6832fc4e8..04d22e02b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ num-bigint = "0.4.6" python3-dll-a = "0.2.10" uuid = "1.9.1" jiter = { version = "0.5", features = ["python"] } +hex = "0.4.3" [lib] name = "_pydantic_core" diff --git a/python/pydantic_core/_pydantic_core.pyi b/python/pydantic_core/_pydantic_core.pyi index 05fe48e40..34c70cd31 100644 --- a/python/pydantic_core/_pydantic_core.pyi +++ b/python/pydantic_core/_pydantic_core.pyi @@ -352,7 +352,7 @@ def to_json( exclude_none: bool = False, round_trip: bool = False, timedelta_mode: Literal['iso8601', 'float'] = 'iso8601', - bytes_mode: Literal['utf8', 'base64'] = 'utf8', + bytes_mode: Literal['utf8', 'base64', 'hex'] = 'utf8', inf_nan_mode: Literal['null', 'constants', 'strings'] = 'constants', serialize_unknown: bool = False, fallback: Callable[[Any], Any] | None = None, @@ -373,7 +373,7 @@ def to_json( exclude_none: Whether to exclude fields that have a value of `None`. round_trip: Whether to enable serialization and validation round-trip support. timedelta_mode: How to serialize `timedelta` objects, either `'iso8601'` or `'float'`. - bytes_mode: How to serialize `bytes` objects, either `'utf8'` or `'base64'`. + bytes_mode: How to serialize `bytes` objects, either `'utf8'`, `'base64'`, or `'hex'`. inf_nan_mode: How to serialize `Infinity`, `-Infinity` and `NaN` values, either `'null'`, `'constants'`, or `'strings'`. serialize_unknown: Attempt to serialize unknown types, `str(value)` will be used, if that fails `""` will be used. @@ -427,7 +427,7 @@ def to_jsonable_python( exclude_none: bool = False, round_trip: bool = False, timedelta_mode: Literal['iso8601', 'float'] = 'iso8601', - bytes_mode: Literal['utf8', 'base64'] = 'utf8', + bytes_mode: Literal['utf8', 'base64', 'hex'] = 'utf8', inf_nan_mode: Literal['null', 'constants', 'strings'] = 'constants', serialize_unknown: bool = False, fallback: Callable[[Any], Any] | None = None, @@ -448,7 +448,7 @@ def to_jsonable_python( exclude_none: Whether to exclude fields that have a value of `None`. round_trip: Whether to enable serialization and validation round-trip support. timedelta_mode: How to serialize `timedelta` objects, either `'iso8601'` or `'float'`. - bytes_mode: How to serialize `bytes` objects, either `'utf8'` or `'base64'`. + bytes_mode: How to serialize `bytes` objects, either `'utf8'`, `'base64'`, or `'hex'`. inf_nan_mode: How to serialize `Infinity`, `-Infinity` and `NaN` values, either `'null'`, `'constants'`, or `'strings'`. serialize_unknown: Attempt to serialize unknown types, `str(value)` will be used, if that fails `""` will be used. diff --git a/python/pydantic_core/core_schema.py b/python/pydantic_core/core_schema.py index 5405d96b2..c157a027b 100644 --- a/python/pydantic_core/core_schema.py +++ b/python/pydantic_core/core_schema.py @@ -70,6 +70,7 @@ class CoreConfig(TypedDict, total=False): ser_json_bytes: The serialization option for `bytes` values. Default is 'utf8'. ser_json_inf_nan: The serialization option for infinity and NaN values in float fields. Default is 'null'. + val_json_bytes: The validation option for `bytes` values, complementing ser_json_bytes. Default is 'utf8'. hide_input_in_errors: Whether to hide input data from `ValidationError` representation. validation_error_cause: Whether to add user-python excs to the __cause__ of a ValidationError. Requires exceptiongroup backport pre Python 3.11. @@ -107,6 +108,7 @@ class CoreConfig(TypedDict, total=False): ser_json_timedelta: Literal['iso8601', 'float'] # default: 'iso8601' ser_json_bytes: Literal['utf8', 'base64', 'hex'] # default: 'utf8' ser_json_inf_nan: Literal['null', 'constants', 'strings'] # default: 'null' + val_json_bytes: Literal['utf8', 'base64', 'hex'] # default: 'utf8' # used to hide input data from ValidationError repr hide_input_in_errors: bool validation_error_cause: bool # default: False @@ -3904,6 +3906,7 @@ def definition_reference_schema( 'bytes_type', 'bytes_too_short', 'bytes_too_long', + 'bytes_invalid_encoding', 'value_error', 'assertion_error', 'literal_error', diff --git a/src/errors/types.rs b/src/errors/types.rs index 09ad47f38..8807ba129 100644 --- a/src/errors/types.rs +++ b/src/errors/types.rs @@ -290,6 +290,10 @@ error_types! { BytesTooLong { max_length: {ctx_type: usize, ctx_fn: field_from_context}, }, + BytesInvalidEncoding { + encoding: {ctx_type: String, ctx_fn: field_from_context}, + encoding_error: {ctx_type: String, ctx_fn: field_from_context}, + }, // --------------------- // python errors from functions ValueError { @@ -515,6 +519,7 @@ impl ErrorType { Self::BytesType {..} => "Input should be a valid bytes", Self::BytesTooShort {..} => "Data should have at least {min_length} byte{expected_plural}", Self::BytesTooLong {..} => "Data should have at most {max_length} byte{expected_plural}", + Self::BytesInvalidEncoding { .. } => "Data should be valid {encoding}: {encoding_error}", Self::ValueError {..} => "Value error, {error}", Self::AssertionError {..} => "Assertion failed, {error}", Self::CustomError {..} => "", // custom errors are handled separately @@ -664,6 +669,11 @@ impl ErrorType { let expected_plural = plural_s(*max_length); to_string_render!(tmpl, max_length, expected_plural) } + Self::BytesInvalidEncoding { + encoding, + encoding_error, + .. + } => render!(tmpl, encoding, encoding_error), Self::ValueError { error, .. } => { let error = &error .as_ref() diff --git a/src/input/input_abstract.rs b/src/input/input_abstract.rs index 5e915c548..b0e058d9b 100644 --- a/src/input/input_abstract.rs +++ b/src/input/input_abstract.rs @@ -7,6 +7,7 @@ use pyo3::{intern, prelude::*}; use crate::errors::{ErrorTypeDefaults, InputValue, LocItem, ValError, ValResult}; use crate::lookup_key::{LookupKey, LookupPath}; use crate::tools::py_err; +use crate::validators::ValBytesMode; use super::datetime::{EitherDate, EitherDateTime, EitherTime, EitherTimedelta}; use super::return_enums::{EitherBytes, EitherInt, EitherString}; @@ -71,7 +72,7 @@ pub trait Input<'py>: fmt::Debug + ToPyObject { fn validate_str(&self, strict: bool, coerce_numbers_to_str: bool) -> ValMatch>; - fn validate_bytes<'a>(&'a self, strict: bool) -> ValMatch>; + fn validate_bytes<'a>(&'a self, strict: bool, mode: ValBytesMode) -> ValMatch>; fn validate_bool(&self, strict: bool) -> ValMatch; diff --git a/src/input/input_json.rs b/src/input/input_json.rs index f2bf74998..3adc36ba6 100644 --- a/src/input/input_json.rs +++ b/src/input/input_json.rs @@ -10,6 +10,7 @@ use strum::EnumMessage; use crate::errors::{ErrorType, ErrorTypeDefaults, InputValue, LocItem, ValError, ValResult}; use crate::lookup_key::{LookupKey, LookupPath}; use crate::validators::decimal::create_decimal; +use crate::validators::ValBytesMode; use super::datetime::{ bytes_as_date, bytes_as_datetime, bytes_as_time, bytes_as_timedelta, float_as_datetime, float_as_duration, @@ -106,9 +107,16 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { } } - fn validate_bytes<'a>(&'a self, _strict: bool) -> ValResult>> { + fn validate_bytes<'a>( + &'a self, + _strict: bool, + mode: ValBytesMode, + ) -> ValResult>> { match self { - JsonValue::Str(s) => Ok(ValidationMatch::strict(s.as_bytes().into())), + JsonValue::Str(s) => match mode.deserialize_string(s) { + Ok(b) => Ok(ValidationMatch::strict(b)), + Err(e) => Err(ValError::new(e, self)), + }, _ => Err(ValError::new(ErrorTypeDefaults::BytesType, self)), } } @@ -342,8 +350,15 @@ impl<'py> Input<'py> for str { Ok(ValidationMatch::strict(self.into())) } - fn validate_bytes<'a>(&'a self, _strict: bool) -> ValResult>> { - Ok(ValidationMatch::strict(self.as_bytes().into())) + fn validate_bytes<'a>( + &'a self, + _strict: bool, + mode: ValBytesMode, + ) -> ValResult>> { + match mode.deserialize_string(self) { + Ok(b) => Ok(ValidationMatch::strict(b)), + Err(e) => Err(ValError::new(e, self)), + } } fn validate_bool(&self, _strict: bool) -> ValResult> { diff --git a/src/input/input_python.rs b/src/input/input_python.rs index 3de712272..519a96f89 100644 --- a/src/input/input_python.rs +++ b/src/input/input_python.rs @@ -17,6 +17,7 @@ use crate::errors::{ErrorType, ErrorTypeDefaults, InputValue, LocItem, ValError, use crate::tools::{extract_i64, safe_repr}; use crate::validators::decimal::{create_decimal, get_decimal_type}; use crate::validators::Exactness; +use crate::validators::ValBytesMode; use crate::ArgsKwargs; use super::datetime::{ @@ -174,7 +175,11 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { Err(ValError::new(ErrorTypeDefaults::StringType, self)) } - fn validate_bytes<'a>(&'a self, strict: bool) -> ValResult>> { + fn validate_bytes<'a>( + &'a self, + strict: bool, + mode: ValBytesMode, + ) -> ValResult>> { if let Ok(py_bytes) = self.downcast_exact::() { return Ok(ValidationMatch::exact(py_bytes.into())); } else if let Ok(py_bytes) = self.downcast::() { @@ -185,7 +190,10 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { if !strict { return if let Ok(py_str) = self.downcast::() { let str = py_string_str(py_str)?; - Ok(str.as_bytes().into()) + match mode.deserialize_string(str) { + Ok(b) => Ok(b), + Err(e) => Err(ValError::new(e, self)), + } } else if let Ok(py_byte_array) = self.downcast::() { Ok(py_byte_array.to_vec().into()) } else { diff --git a/src/input/input_string.rs b/src/input/input_string.rs index 3c61cdebc..3ef1b58ce 100644 --- a/src/input/input_string.rs +++ b/src/input/input_string.rs @@ -8,6 +8,7 @@ use crate::input::py_string_str; use crate::lookup_key::{LookupKey, LookupPath}; use crate::tools::safe_repr; use crate::validators::decimal::create_decimal; +use crate::validators::ValBytesMode; use super::datetime::{ bytes_as_date, bytes_as_datetime, bytes_as_time, bytes_as_timedelta, EitherDate, EitherDateTime, EitherTime, @@ -105,9 +106,16 @@ impl<'py> Input<'py> for StringMapping<'py> { } } - fn validate_bytes<'a>(&'a self, _strict: bool) -> ValResult>> { + fn validate_bytes<'a>( + &'a self, + _strict: bool, + mode: ValBytesMode, + ) -> ValResult>> { match self { - Self::String(s) => py_string_str(s).map(|b| ValidationMatch::strict(b.as_bytes().into())), + Self::String(s) => py_string_str(s).and_then(|b| match mode.deserialize_string(b) { + Ok(b) => Ok(ValidationMatch::strict(b)), + Err(e) => Err(ValError::new(e, self)), + }), Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::BytesType, self)), } } diff --git a/src/lib.rs b/src/lib.rs index d55e83645..e94abf679 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,8 @@ use std::sync::OnceLock; use jiter::{map_json_error, PartialMode, PythonParse, StringCacheMode}; use pyo3::exceptions::PyTypeError; use pyo3::{prelude::*, sync::GILOnceCell}; +use serializers::BytesMode; +use validators::ValBytesMode; // parse this first to get access to the contained macro #[macro_use] @@ -55,7 +57,7 @@ pub fn from_json<'py>( allow_partial: bool, ) -> PyResult> { let v_match = data - .validate_bytes(false) + .validate_bytes(false, ValBytesMode { ser: BytesMode::Utf8 }) .map_err(|_| PyTypeError::new_err("Expected bytes, bytearray or str"))?; let json_either_bytes = v_match.into_inner(); let json_bytes = json_either_bytes.as_slice(); diff --git a/src/serializers/config.rs b/src/serializers/config.rs index 5421ed920..359d35276 100644 --- a/src/serializers/config.rs +++ b/src/serializers/config.rs @@ -52,7 +52,7 @@ pub trait FromConfig { macro_rules! serialization_mode { ($name:ident, $config_key:expr, $($variant:ident => $value:expr),* $(,)?) => { #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] - pub(crate) enum $name { + pub enum $name { #[default] $($variant,)* } @@ -183,9 +183,7 @@ impl BytesMode { Err(e) => Err(Error::custom(e.to_string())), }, Self::Base64 => serializer.serialize_str(&base64::engine::general_purpose::URL_SAFE.encode(bytes)), - Self::Hex => { - serializer.serialize_str(&bytes.iter().fold(String::new(), |acc, b| acc + &format!("{b:02x}"))) - } + Self::Hex => serializer.serialize_str(hex::encode(bytes).as_str()), } } } diff --git a/src/serializers/mod.rs b/src/serializers/mod.rs index 66b345530..1a0405e2c 100644 --- a/src/serializers/mod.rs +++ b/src/serializers/mod.rs @@ -8,6 +8,7 @@ use pyo3::{PyTraverseError, PyVisit}; use crate::definitions::{Definitions, DefinitionsBuilder}; use crate::py_gc::PyGcTraverse; +pub(crate) use config::BytesMode; use config::SerializationConfig; pub use errors::{PydanticSerializationError, PydanticSerializationUnexpectedValue}; use extra::{CollectWarnings, SerRecursionState, WarningsMode}; diff --git a/src/validators/bytes.rs b/src/validators/bytes.rs index eb9d9441d..c51d77171 100644 --- a/src/validators/bytes.rs +++ b/src/validators/bytes.rs @@ -8,11 +8,13 @@ use crate::input::Input; use crate::tools::SchemaDict; +use super::config::ValBytesMode; use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; #[derive(Debug, Clone)] pub struct BytesValidator { strict: bool, + bytes_mode: ValBytesMode, } impl BuildValidator for BytesValidator { @@ -31,6 +33,7 @@ impl BuildValidator for BytesValidator { } else { Ok(Self { strict: is_strict(schema, config)?, + bytes_mode: ValBytesMode::from_config(config)?, } .into()) } @@ -47,7 +50,7 @@ impl Validator for BytesValidator { state: &mut ValidationState<'_, 'py>, ) -> ValResult { input - .validate_bytes(state.strict_or(self.strict)) + .validate_bytes(state.strict_or(self.strict), self.bytes_mode) .map(|m| m.unpack(state).into_py(py)) } @@ -59,6 +62,7 @@ impl Validator for BytesValidator { #[derive(Debug, Clone)] pub struct BytesConstrainedValidator { strict: bool, + bytes_mode: ValBytesMode, max_length: Option, min_length: Option, } @@ -72,7 +76,9 @@ impl Validator for BytesConstrainedValidator { input: &(impl Input<'py> + ?Sized), state: &mut ValidationState<'_, 'py>, ) -> ValResult { - let either_bytes = input.validate_bytes(state.strict_or(self.strict))?.unpack(state); + let either_bytes = input + .validate_bytes(state.strict_or(self.strict), self.bytes_mode)? + .unpack(state); let len = either_bytes.len()?; if let Some(min_length) = self.min_length { @@ -110,6 +116,7 @@ impl BytesConstrainedValidator { let py = schema.py(); Ok(Self { strict: is_strict(schema, config)?, + bytes_mode: ValBytesMode::from_config(config)?, min_length: schema.get_as(intern!(py, "min_length"))?, max_length: schema.get_as(intern!(py, "max_length"))?, } diff --git a/src/validators/config.rs b/src/validators/config.rs new file mode 100644 index 000000000..8ffcfd0f5 --- /dev/null +++ b/src/validators/config.rs @@ -0,0 +1,49 @@ +use std::borrow::Cow; +use std::str::FromStr; + +use base64::Engine; +use pyo3::types::{PyDict, PyString}; +use pyo3::{intern, prelude::*}; + +use crate::errors::ErrorType; +use crate::input::EitherBytes; +use crate::serializers::BytesMode; +use crate::tools::SchemaDict; + +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +pub struct ValBytesMode { + pub ser: BytesMode, +} + +impl ValBytesMode { + pub fn from_config(config: Option<&Bound<'_, PyDict>>) -> PyResult { + let Some(config_dict) = config else { + return Ok(Self::default()); + }; + let raw_mode = config_dict.get_as::>(intern!(config_dict.py(), "val_json_bytes"))?; + let ser_mode = raw_mode.map_or_else(|| Ok(BytesMode::default()), |raw| BytesMode::from_str(&raw.to_cow()?))?; + Ok(Self { ser: ser_mode }) + } + + pub fn deserialize_string<'py>(self, s: &str) -> Result, ErrorType> { + match self.ser { + BytesMode::Utf8 => Ok(EitherBytes::Cow(Cow::Borrowed(s.as_bytes()))), + BytesMode::Base64 => match base64::engine::general_purpose::URL_SAFE.decode(s) { + Ok(bytes) => Ok(EitherBytes::from(bytes)), + Err(err) => Err(ErrorType::BytesInvalidEncoding { + encoding: "base64".to_string(), + encoding_error: err.to_string(), + context: None, + }), + }, + BytesMode::Hex => match hex::decode(s) { + Ok(vec) => Ok(EitherBytes::from(vec)), + Err(err) => Err(ErrorType::BytesInvalidEncoding { + encoding: "hex".to_string(), + encoding_error: err.to_string(), + context: None, + }), + }, + } + } +} diff --git a/src/validators/json.rs b/src/validators/json.rs index 3e35a640b..f86cc4624 100644 --- a/src/validators/json.rs +++ b/src/validators/json.rs @@ -6,8 +6,10 @@ use jiter::{JsonValue, PartialMode, PythonParse}; use crate::errors::{ErrorType, ErrorTypeDefaults, ValError, ValLineError, ValResult}; use crate::input::{EitherBytes, Input, InputType, ValidationMatch}; +use crate::serializers::BytesMode; use crate::tools::SchemaDict; +use super::config::ValBytesMode; use super::{build_validator, BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; #[derive(Debug)] @@ -87,7 +89,7 @@ impl Validator for JsonValidator { pub fn validate_json_bytes<'a, 'py>( input: &'a (impl Input<'py> + ?Sized), ) -> ValResult>> { - match input.validate_bytes(false) { + match input.validate_bytes(false, ValBytesMode { ser: BytesMode::Utf8 }) { Ok(v_match) => Ok(v_match), Err(ValError::LineErrors(e)) => Err(ValError::LineErrors( e.into_iter().map(map_bytes_error).collect::>(), diff --git a/src/validators/mod.rs b/src/validators/mod.rs index 02abda1fd..18c947313 100644 --- a/src/validators/mod.rs +++ b/src/validators/mod.rs @@ -16,6 +16,7 @@ use crate::input::{Input, InputType, StringMapping}; use crate::py_gc::PyGcTraverse; use crate::recursion_guard::RecursionState; use crate::tools::SchemaDict; +pub(crate) use config::ValBytesMode; mod any; mod arguments; @@ -24,6 +25,7 @@ mod bytes; mod call; mod callable; mod chain; +mod config; mod custom_error; mod dataclass; mod date; diff --git a/src/validators/uuid.rs b/src/validators/uuid.rs index 44aa22003..e19dfcfd0 100644 --- a/src/validators/uuid.rs +++ b/src/validators/uuid.rs @@ -13,8 +13,10 @@ use crate::input::input_as_python_instance; use crate::input::Input; use crate::input::InputType; use crate::input::ValidationMatch; +use crate::serializers::BytesMode; use crate::tools::SchemaDict; +use super::config::ValBytesMode; use super::model::create_class; use super::model::force_setattr; use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, Exactness, ValidationState, Validator}; @@ -172,7 +174,7 @@ impl UuidValidator { } None => { let either_bytes = input - .validate_bytes(true) + .validate_bytes(true, ValBytesMode { ser: BytesMode::Utf8 }) .map_err(|_| ValError::new(ErrorTypeDefaults::UuidType, input))? .into_inner(); let bytes_slice = either_bytes.as_slice(); diff --git a/tests/test_errors.py b/tests/test_errors.py index daabf04c6..bd6f6214e 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -320,6 +320,16 @@ def f(input_value, info): ('bytes_too_short', 'Data should have at least 1 byte', {'min_length': 1}), ('bytes_too_long', 'Data should have at most 42 bytes', {'max_length': 42}), ('bytes_too_long', 'Data should have at most 1 byte', {'max_length': 1}), + ( + 'bytes_invalid_encoding', + 'Data should be valid base64: Invalid byte 1, offset 1', + {'encoding': 'base64', 'encoding_error': 'Invalid byte 1, offset 1'}, + ), + ( + 'bytes_invalid_encoding', + 'Data should be valid hex: Odd number of digits', + {'encoding': 'hex', 'encoding_error': 'Odd number of digits'}, + ), ('value_error', 'Value error, foobar', {'error': ValueError('foobar')}), ('assertion_error', 'Assertion failed, foobar', {'error': AssertionError('foobar')}), ('literal_error', 'Input should be foo', {'expected': 'foo'}), diff --git a/tests/test_json.py b/tests/test_json.py index 0f599cee1..2a5176a27 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -376,3 +376,78 @@ def test_partial_parse(): with pytest.raises(ValueError, match='EOF while parsing a string at line 1 column 15'): from_json(b'["aa", "bb", "c') assert from_json(b'["aa", "bb", "c', allow_partial=True) == ['aa', 'bb'] + + +def test_json_bytes_base64_round_trip(): + data = b'hello' + encoded = b'"aGVsbG8="' + assert to_json(data, bytes_mode='base64') == encoded + + v = SchemaValidator({'type': 'bytes'}, {'val_json_bytes': 'base64'}) + assert v.validate_json(encoded) == data + + assert to_json({'key': data}, bytes_mode='base64') == b'{"key":"aGVsbG8="}' + v = SchemaValidator( + {'type': 'dict', 'keys_schema': {'type': 'str'}, 'values_schema': {'type': 'bytes'}}, + {'val_json_bytes': 'base64'}, + ) + assert v.validate_json('{"key":"aGVsbG8="}') == {'key': data} + + +def test_json_bytes_base64_invalid(): + v = SchemaValidator({'type': 'bytes'}, {'val_json_bytes': 'base64'}) + wrong_input = 'wrong!' + with pytest.raises(ValidationError) as exc_info: + v.validate_json(json.dumps(wrong_input)) + assert exc_info.value.errors(include_url=False, include_context=False) == [ + { + 'type': 'bytes_invalid_encoding', + 'loc': (), + 'msg': f'Data should be valid base64: Invalid byte {ord("!")}, offset {len(wrong_input)-1}.', + 'input': wrong_input, + } + ] + + +def test_json_bytes_hex_round_trip(): + data = b'hello' + encoded = b'"68656c6c6f"' + assert to_json(data, bytes_mode='hex') == encoded + + v = SchemaValidator({'type': 'bytes'}, {'val_json_bytes': 'hex'}) + assert v.validate_json(encoded) == data + + assert to_json({'key': data}, bytes_mode='hex') == b'{"key":"68656c6c6f"}' + v = SchemaValidator( + {'type': 'dict', 'keys_schema': {'type': 'str'}, 'values_schema': {'type': 'bytes'}}, + {'val_json_bytes': 'hex'}, + ) + assert v.validate_json('{"key":"68656c6c6f"}') == {'key': data} + + +def test_json_bytes_hex_invalid(): + v = SchemaValidator({'type': 'bytes'}, {'val_json_bytes': 'hex'}) + + wrong_input = 'a' + with pytest.raises(ValidationError) as exc_info: + v.validate_json(json.dumps(wrong_input)) + assert exc_info.value.errors(include_url=False, include_context=False) == [ + { + 'type': 'bytes_invalid_encoding', + 'loc': (), + 'msg': 'Data should be valid hex: Odd number of digits', + 'input': wrong_input, + } + ] + + wrong_input = 'ag' + with pytest.raises(ValidationError) as exc_info: + v.validate_json(json.dumps(wrong_input)) + assert exc_info.value.errors(include_url=False, include_context=False) == [ + { + 'type': 'bytes_invalid_encoding', + 'loc': (), + 'msg': "Data should be valid hex: Invalid character 'g' at position 1", + 'input': wrong_input, + } + ]