From 94a6f9a5d0d54853afef634993650d27f91e0ce4 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Thu, 5 Sep 2024 13:16:17 +0200 Subject: [PATCH] Update PyO3 0.20 -> 0.22 & Python 3.13 (#169) --- .github/workflows/CI.yml | 9 +- Cargo.toml | 6 +- src/blosc2.rs | 1227 +++++++++++++++++++------------------- src/brotli.rs | 171 +++--- src/bzip2.rs | 164 ++--- src/deflate.rs | 164 ++--- src/experimental.rs | 19 +- src/gzip.rs | 168 +++--- src/io.rs | 24 +- src/lib.rs | 70 ++- src/lz4.rs | 464 +++++++------- src/snappy.rs | 285 +++++---- src/xz.rs | 661 ++++++++++---------- src/zstd.rs | 167 +++--- 14 files changed, 1842 insertions(+), 1757 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 3fae5415..d3af49c1 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -23,11 +23,12 @@ jobs: fail-fast: ${{ !( startsWith(github.ref, 'refs/heads/master') || startsWith(github.ref, 'refs/tags/') ) }} matrix: python-version: - - '3.8' - - '3.9' + - '3.8' + - '3.9' - '3.10' - '3.11' - '3.12' + - '3.13' conf: - { os: ubuntu-latest, target: x86_64, target-triple: x86_64-unknown-linux-gnu, manylinux: auto } - { os: ubuntu-latest, target: x86_64, target-triple: x86_64-unknown-linux-musl, manylinux: musllinux_1_1 } @@ -116,6 +117,7 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: ${{ matrix.conf.python-architecture }} + allow-prereleases: true - name: Install Rust toolchain uses: dtolnay/rust-toolchain@stable @@ -214,7 +216,8 @@ jobs: # Could use 'distro: alpine_latest' in 'run-on-arch-action' but seems difficult to install a specific version of python # so we'll just use existing python alpine images to test import and cli use w/o testing archs other than x86_64 - name: Install built wheel and Test (musllinux) - if: startsWith(matrix.conf.manylinux, 'musl') && matrix.conf.target == 'x86_64' + # TODO: python:3.13-alpine doesn't exist yet + if: startsWith(matrix.conf.manylinux, 'musl') && matrix.conf.target == 'x86_64' && matrix.python-version != '3.13' run: | docker run \ -v $(pwd)/dist:/wheels \ diff --git a/Cargo.toml b/Cargo.toml index 80a7df01..9676bb4b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cramjam-python" -version = "2.8.4-rc2" +version = "2.8.4-rc3" authors = ["Miles Granger "] edition = "2021" license = "MIT" @@ -20,11 +20,11 @@ use-system-blosc2 = ["libcramjam/use-system-blosc2"] [dependencies] -pyo3 = { version = "^0.20", default-features = false, features = ["macros"] } +pyo3 = { version = "^0.22", default-features = false, features = ["macros"] } libcramjam = { version = "0.4.2" } [build-dependencies] -pyo3-build-config = "^0.20" +pyo3-build-config = "^0.22" [profile.release] strip = true diff --git a/src/blosc2.rs b/src/blosc2.rs index 76034a7d..120265b2 100644 --- a/src/blosc2.rs +++ b/src/blosc2.rs @@ -1,258 +1,79 @@ -//! snappy de/compression interface -use std::io::{self, BufReader, Cursor}; - -use crate::exceptions::{CompressionError, DecompressionError}; -use crate::io::{AsBytes, RustyBuffer}; -use crate::BytesType; -use libcramjam::blosc2::blosc2::schunk::{Chunk, SChunk, Storage}; -use libcramjam::blosc2::blosc2::{CLevel, CParams, Codec, DParams, Filter}; -use pyo3::exceptions::{self, PyRuntimeError}; +//! blosc2 de/compression interface use pyo3::prelude::*; -use pyo3::types::PySlice; -use pyo3::wrap_pyfunction; -use pyo3::PyResult; -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - libcramjam::blosc2::blosc2::init(); - - let ncores = std::thread::available_parallelism().map(|v| v.get()).unwrap_or(1); - libcramjam::blosc2::blosc2::set_nthreads(ncores); - - m.add_function(wrap_pyfunction!(compress, m)?)?; - m.add_function(wrap_pyfunction!(decompress, m)?)?; - - m.add_function(wrap_pyfunction!(compress_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_into, m)?)?; - - m.add_function(wrap_pyfunction!(compress_chunk, m)?)?; - m.add_function(wrap_pyfunction!(decompress_chunk, m)?)?; - - m.add_function(wrap_pyfunction!(compress_chunk_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_chunk_into, m)?)?; - - // extra functions helpful when using blosc2 - m.add_function(wrap_pyfunction!(get_version, m)?)?; - m.add_function(wrap_pyfunction!(get_nthreads, m)?)?; - m.add_function(wrap_pyfunction!(set_nthreads, m)?)?; - m.add_function(wrap_pyfunction!(max_compressed_len, m)?)?; - - m.add_class::()?; - m.add_class::()?; - - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - Ok(()) -} - -/// Compress into SChunk -#[pyfunction] -#[allow(unused_variables)] - -pub fn compress( - py: Python, - input: BytesType, - output_len: Option, - typesize: Option, - clevel: Option, - filter: Option, - codec: Option, - nthreads: Option, -) -> PyResult { - if input.is_empty() { - return Ok(RustyBuffer::from(vec![])); - } - - let mut cparams = CParams::from_typesize(typesize.unwrap_or_else(|| input.itemsize())) - .set_codec(codec.map_or_else(Codec::default, Into::into)) - .set_clevel(clevel.map_or_else(CLevel::default, Into::into)) - .set_filter(filter.map_or_else(Filter::default, Into::into)) - .set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); - let mut dparams = DParams::default().set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); - - let storage = Storage::default() - .set_contiguous(true) - .set_cparams(&mut cparams) - .set_dparams(&mut dparams); - - let mut schunk = SChunk::new(storage); - io::copy(&mut BufReader::new(input), &mut schunk)?; - schunk - .into_vec() - .map(RustyBuffer::from) - .map_err(CompressionError::from_err) -} - -/// Compress into output -#[pyfunction] -pub fn compress_into( - _py: Python, - input: BytesType, - mut output: BytesType, - typesize: Option, - clevel: Option, - filter: Option, - codec: Option, - nthreads: Option, -) -> PyResult { - if input.is_empty() { - return Ok(0); - } - - let mut cparams = CParams::from_typesize(typesize.unwrap_or_else(|| input.itemsize())) - .set_codec(codec.map_or_else(Codec::default, Into::into)) - .set_clevel(clevel.map_or_else(CLevel::default, Into::into)) - .set_filter(filter.map_or_else(Filter::default, Into::into)) - .set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); - let mut dparams = DParams::default().set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); - - let storage = Storage::default() - .set_contiguous(true) - .set_cparams(&mut cparams) - .set_dparams(&mut dparams); - - if let BytesType::RustyFile(_file) = output { - return Err(pyo3::exceptions::PyNotImplementedError::new_err( - "Output to File w/ blosc2 not implemented yet.", - )); - // storage = storage - // .set_urlpath(&file.borrow().path) - // .map_err(CompressionError::from_err)?; - } - - let mut schunk = SChunk::new(storage); - io::copy(&mut BufReader::new(input), &mut schunk)?; - - let nbytes = schunk.frame().map_err(CompressionError::from_err)?.len(); - match output { - BytesType::RustyFile(_) => Ok(nbytes), - _ => { - let schunk_buf = schunk.into_vec().map_err(CompressionError::from_err)?; - io::copy(&mut Cursor::new(schunk_buf), &mut output)?; - Ok(nbytes) +/// blosc2 de/compression interface +#[pymodule] +pub mod blosc2 { + + use std::io::{self, BufReader, Cursor}; + + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::{AsBytes, RustyBuffer}; + use crate::BytesType; + use libcramjam::blosc2::blosc2::schunk::{Chunk, SChunk, Storage}; + use libcramjam::blosc2::blosc2::{CLevel, CParams, Codec, DParams, Filter}; + use pyo3::exceptions::{self, PyRuntimeError}; + use pyo3::prelude::*; + use pyo3::types::PySlice; + use pyo3::PyResult; + + /// Compress into SChunk + #[pyfunction] + #[allow(unused_variables)] + #[pyo3(signature = (input, output_len=None, typesize=None, clevel=None, filter=None, codec=None, nthreads=None))] + pub fn compress( + py: Python, + input: BytesType, + output_len: Option, + typesize: Option, + clevel: Option, + filter: Option, + codec: Option, + nthreads: Option, + ) -> PyResult { + if input.is_empty() { + return Ok(RustyBuffer::from(vec![])); } - } -} - -/// Decompress a SChunk into buffer -#[pyfunction] -#[allow(unused_variables)] -pub fn decompress(py: Python, input: BytesType, output_len: Option) -> PyResult { - if input.is_empty() { - return Ok(RustyBuffer::from(vec![])); - } - return crate::generic!(py, libcramjam::blosc2::decompress[input], output_len = output_len) - .map_err(DecompressionError::from_err); -} - -/// decompress into output -#[pyfunction] -pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - if input.is_empty() { - return Ok(0); - } - crate::generic!(py, libcramjam::blosc2::decompress[input, output]).map_err(DecompressionError::from_err) -} - -/// Blosc2 decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.blosc2.decompress(compressed_bytes, output_len=Optional[None]) -/// ``` -#[pyfunction] -#[allow(unused_variables)] -pub fn decompress_chunk(py: Python, data: BytesType, output_len: Option) -> PyResult { - let bytes = data.as_bytes(); - let buf = py - .allow_threads(|| libcramjam::blosc2::decompress_chunk(bytes)) - .map(RustyBuffer::from)?; - Ok(buf) -} - -/// Decompress a Chunk into output -#[pyfunction] -pub fn decompress_chunk_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - let bytes = input.as_bytes(); - let out = output.as_bytes_mut()?; - let nbytes = py.allow_threads(|| libcramjam::blosc2::decompress_chunk_into(bytes, out))?; - Ok(nbytes) -} -/// Blosc2 compression, chunk format -/// -/// Python Example -/// -------------- -/// ```python -/// >>> _ = cramjam.blosc2.compress(b'some bytes here', typesize=1, clevel=CLevel.Nine, filter=Filter.Shuffle, codec=Codec.BloscLz) -/// ``` -#[pyfunction] -#[allow(unused_variables)] -pub fn compress_chunk( - py: Python, - data: BytesType, - typesize: Option, - clevel: Option, - filter: Option, - codec: Option, -) -> PyResult { - let bytes = data.as_bytes(); - py.allow_threads(|| { - let clevel = clevel.map(Into::into); - let filter = filter.map(Into::into); - let codec = codec.map(Into::into); - libcramjam::blosc2::blosc2::compress(bytes, typesize, clevel, filter, codec) - }) - .map_err(CompressionError::from_err) - .map(RustyBuffer::from) -} - -/// Compress a Chunk into output -#[pyfunction] -pub fn compress_chunk_into( - py: Python, - input: BytesType, - mut output: BytesType, - typesize: Option, - clevel: Option, - filter: Option, - codec: Option, -) -> PyResult { - let bytes = input.as_bytes(); - let out = output.as_bytes_mut()?; - py.allow_threads(|| { - let clevel = clevel.map(Into::into); - let filter = filter.map(Into::into); - let codec = codec.map(Into::into); - libcramjam::blosc2::blosc2::compress_into(bytes, out, typesize, clevel, filter, codec) - }) - .map_err(CompressionError::from_err) -} + let mut cparams = CParams::from_typesize(typesize.unwrap_or_else(|| input.itemsize())) + .set_codec(codec.map_or_else(Codec::default, Into::into)) + .set_clevel(clevel.map_or_else(CLevel::default, Into::into)) + .set_filter(filter.map_or_else(Filter::default, Into::into)) + .set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); + let mut dparams = + DParams::default().set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); -/// A Compressor interface, using blosc2's SChunk -#[pyclass] -#[derive(Clone)] -pub struct Compressor(Option); + let storage = Storage::default() + .set_contiguous(true) + .set_cparams(&mut cparams) + .set_dparams(&mut dparams); -unsafe impl Send for Compressor {} + let mut schunk = SChunk::new(storage); + io::copy(&mut BufReader::new(input), &mut schunk)?; + schunk + .into_vec() + .map(RustyBuffer::from) + .map_err(CompressionError::from_err) + } -#[pymethods] -impl Compressor { - /// Initialize a new `Compressor` instance. - #[new] - pub fn __init__( - path: Option, + /// Compress into output + #[pyfunction] + #[pyo3(signature = (input, output, typesize=None, clevel=None, filter=None, codec=None, nthreads=None))] + pub fn compress_into( + _py: Python, + input: BytesType, + mut output: BytesType, typesize: Option, clevel: Option, filter: Option, codec: Option, nthreads: Option, - ) -> PyResult { - let mut cparams = CParams::from_typesize(typesize.unwrap_or(1)) + ) -> PyResult { + if input.is_empty() { + return Ok(0); + } + + let mut cparams = CParams::from_typesize(typesize.unwrap_or_else(|| input.itemsize())) .set_codec(codec.map_or_else(Codec::default, Into::into)) .set_clevel(clevel.map_or_else(CLevel::default, Into::into)) .set_filter(filter.map_or_else(Filter::default, Into::into)) @@ -260,449 +81,627 @@ impl Compressor { let mut dparams = DParams::default().set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); - let mut storage = Storage::default() + let storage = Storage::default() .set_contiguous(true) .set_cparams(&mut cparams) .set_dparams(&mut dparams); - if let Some(pth) = path { - storage = storage.set_urlpath(pth).map_err(CompressionError::from_err)?; - } - - let schunk = SChunk::new(storage); - Ok(Self(Some(schunk))) - } - /// Compress input into the current compressor's stream. - pub fn compress(&mut self, input: BytesType) -> PyResult { - match self.0.as_mut() { - Some(schunk) => schunk - .append_buffer(input.as_bytes()) - .map_err(CompressionError::from_err), - None => Err(CompressionError::new_err("Compressor has been consumed")), + if let BytesType::RustyFile(_file) = output { + return Err(pyo3::exceptions::PyNotImplementedError::new_err( + "Output to File w/ blosc2 not implemented yet.", + )); + // storage = storage + // .set_urlpath(&file.borrow().path) + // .map_err(CompressionError::from_err)?; } - } - /// Flush and return current compressed stream, if file-backed Schunk, - /// then empty buf is returned - pub fn flush(&mut self) -> PyResult { - match self.0.as_ref() { - Some(schunk) => { - let buf = schunk.frame().map_err(CompressionError::from_err)?; - Ok(RustyBuffer::from(buf.to_vec())) + let mut schunk = SChunk::new(storage); + io::copy(&mut BufReader::new(input), &mut schunk)?; + + let nbytes = schunk.frame().map_err(CompressionError::from_err)?.len(); + match output { + BytesType::RustyFile(_) => Ok(nbytes), + _ => { + let schunk_buf = schunk.into_vec().map_err(CompressionError::from_err)?; + io::copy(&mut Cursor::new(schunk_buf), &mut output)?; + Ok(nbytes) } - None => Err(CompressionError::new_err("Compressor has been consumed")), } } - /// Consume the current compressor state and return the compressed stream - /// **NB** The compressor will not be usable after this method is called. - pub fn finish(&mut self) -> PyResult { - match std::mem::take(&mut self.0) { - Some(schunk) => schunk - .into_vec() - .map_err(CompressionError::from_err) - .map(RustyBuffer::from), - None => Err(CompressionError::new_err("Compressor has been consumed")), + /// Decompress a SChunk into buffer + #[pyfunction] + #[allow(unused_variables)] + #[pyo3(signature = (input, output_len=None))] + pub fn decompress(py: Python, input: BytesType, output_len: Option) -> PyResult { + if input.is_empty() { + return Ok(RustyBuffer::from(vec![])); } + return crate::generic!(py, libcramjam::blosc2::decompress[input], output_len = output_len) + .map_err(DecompressionError::from_err); } -} -crate::make_decompressor!(blosc2); - -/// Represents a single compressed 'chunk' of data. Analogous to Lz4 block or snappy's raw format in Blosc2 -#[pyclass(name = "Chunk")] -pub struct PyChunk(Chunk); - -#[pymethods] -impl PyChunk { - /// Construct a Chunk from compressing - #[classmethod] - pub fn compress( - _cls: &PyAny, - src: BytesType, + /// decompress into output + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + if input.is_empty() { + return Ok(0); + } + crate::generic!(py, libcramjam::blosc2::decompress[input, output]).map_err(DecompressionError::from_err) + } + + /// Blosc2 decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.blosc2.decompress(compressed_bytes, output_len=Optional[None]) + /// ``` + #[pyfunction] + #[allow(unused_variables)] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress_chunk(py: Python, data: BytesType, output_len: Option) -> PyResult { + let bytes = data.as_bytes(); + let buf = py + .allow_threads(|| libcramjam::blosc2::decompress_chunk(bytes)) + .map(RustyBuffer::from)?; + Ok(buf) + } + + /// Decompress a Chunk into output + #[pyfunction] + pub fn decompress_chunk_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + let bytes = input.as_bytes(); + let out = output.as_bytes_mut()?; + let nbytes = py.allow_threads(|| libcramjam::blosc2::decompress_chunk_into(bytes, out))?; + Ok(nbytes) + } + + /// Blosc2 compression, chunk format + /// + /// Python Example + /// -------------- + /// ```python + /// >>> _ = cramjam.blosc2.compress(b'some bytes here', typesize=1, clevel=CLevel.Nine, filter=Filter.Shuffle, codec=Codec.BloscLz) + /// ``` + #[pyfunction] + #[allow(unused_variables)] + #[pyo3(signature = (data, typesize=None, clevel=None, filter=None, codec=None))] + pub fn compress_chunk( + py: Python, + data: BytesType, typesize: Option, clevel: Option, filter: Option, codec: Option, - ) -> PyResult { - let typesize = typesize.or_else(|| Some(src.itemsize())); - let clevel = clevel.map(Into::into); - let filter = filter.map(Into::into); - let codec = codec.map(Into::into); - let chunk = - Chunk::compress(src.as_bytes(), typesize, clevel, filter, codec).map_err(CompressionError::from_err)?; - Ok(Self(chunk)) + ) -> PyResult { + let bytes = data.as_bytes(); + py.allow_threads(|| { + let clevel = clevel.map(Into::into); + let filter = filter.map(Into::into); + let codec = codec.map(Into::into); + libcramjam::blosc2::blosc2::compress(bytes, typesize, clevel, filter, codec) + }) + .map_err(CompressionError::from_err) + .map(RustyBuffer::from) } - /// Decompress this chunk into bytes buffer - pub fn decompress(&self) -> PyResult { - self.0 - .decompress() - .map_err(DecompressionError::from_err) - .map(RustyBuffer::from) + /// Compress a Chunk into output + #[pyfunction] + #[pyo3(signature = (input, output, typesize=None, clevel=None, filter=None, codec=None))] + pub fn compress_chunk_into( + py: Python, + input: BytesType, + mut output: BytesType, + typesize: Option, + clevel: Option, + filter: Option, + codec: Option, + ) -> PyResult { + let bytes = input.as_bytes(); + let out = output.as_bytes_mut()?; + py.allow_threads(|| { + let clevel = clevel.map(Into::into); + let filter = filter.map(Into::into); + let codec = codec.map(Into::into); + libcramjam::blosc2::blosc2::compress_into(bytes, out, typesize, clevel, filter, codec) + }) + .map_err(CompressionError::from_err) } - /// Get raw bytes of this Chunk - pub fn raw(&self) -> PyResult<&[u8]> { - self.0 - .as_slice() - .map_err(|e| exceptions::PyBufferError::new_err(e.to_string())) - } + /// A Compressor interface, using blosc2's SChunk + #[pyclass] + #[derive(Clone)] + pub struct Compressor(Option); + + unsafe impl Send for Compressor {} + + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (path=None, typesize=None, clevel=None, filter=None, codec=None, nthreads=None))] + pub fn __init__( + path: Option, + typesize: Option, + clevel: Option, + filter: Option, + codec: Option, + nthreads: Option, + ) -> PyResult { + let mut cparams = CParams::from_typesize(typesize.unwrap_or(1)) + .set_codec(codec.map_or_else(Codec::default, Into::into)) + .set_clevel(clevel.map_or_else(CLevel::default, Into::into)) + .set_filter(filter.map_or_else(Filter::default, Into::into)) + .set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); + let mut dparams = + DParams::default().set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); + + let mut storage = Storage::default() + .set_contiguous(true) + .set_cparams(&mut cparams) + .set_dparams(&mut dparams); + if let Some(pth) = path { + storage = storage.set_urlpath(pth).map_err(CompressionError::from_err)?; + } - /// repr - pub fn __repr__(&self) -> PyResult { - let ratio = self.0.compression_ratio().map_err(CompressionError::from_err)?; - let info = self.0.info().map_err(CompressionError::from_err)?; - let repr = format!( - "Chunk", - info.cbytes(), - info.nbytes() - ); - Ok(repr) - } -} + let schunk = SChunk::new(storage); + Ok(Self(Some(schunk))) + } -/// SChunk interface -#[pyclass(name = "SChunk")] -pub struct PySChunk { - schunk: SChunk, - from_bytes_cb: Option, - to_bytes_cb: Option, -} + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: BytesType) -> PyResult { + match self.0.as_mut() { + Some(schunk) => schunk + .append_buffer(input.as_bytes()) + .map_err(CompressionError::from_err), + None => Err(CompressionError::new_err("Compressor has been consumed")), + } + } -unsafe impl Send for PySChunk {} - -// Trampoline function from PySChunk, since generics not allowed. -// Call a function on PyObject which may be BytesType, or have a `converter` python function to convert -// the input PyObject `buf` into a `BytesType`, then call the intended operation on the bytes -#[inline] -fn try_to_bytes_with_op(py: Python, buf: PyObject, converter: Option<&PyObject>, op: F) -> PyResult -where - F: FnOnce(&[u8]) -> PyResult, -{ - match buf.extract::(py) { - Ok(bt) => op(bt.as_bytes()), - Err(_) => { - if let Some(to_bytes_cb) = &converter { - let obj = to_bytes_cb.call(py, (&buf,), None)?; - let bytestype = obj.extract::(py)?; - op(bytestype.as_bytes()) - } else { - let msg = "Could not convert to variant of `BytesType` and no `to_bytes_cb` function set"; - return Err(CompressionError::new_err(msg)); + /// Flush and return current compressed stream, if file-backed Schunk, + /// then empty buf is returned + pub fn flush(&mut self) -> PyResult { + match self.0.as_ref() { + Some(schunk) => { + let buf = schunk.frame().map_err(CompressionError::from_err)?; + Ok(RustyBuffer::from(buf.to_vec())) + } + None => Err(CompressionError::new_err("Compressor has been consumed")), } } - } - .map_err(CompressionError::from_err) -} -/// Helper function to convert a RustyBuffer to some other PyObject -/// as defined by user callback converter function -#[inline] -fn maybe_convert_buffer(py: Python, buf: RustyBuffer, converter: Option<&PyObject>) -> PyResult { - match converter { - Some(convert) => convert.call(py, (buf,), None), - None => Ok(buf.into_py(py)), + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + match std::mem::take(&mut self.0) { + Some(schunk) => schunk + .into_vec() + .map_err(CompressionError::from_err) + .map(RustyBuffer::from), + None => Err(CompressionError::new_err("Compressor has been consumed")), + } + } } -} -#[pymethods] -impl PySChunk { - /// Construct a new SChunk - #[new] - pub fn __init__( - path: Option, - typesize: Option, - clevel: Option, - filter: Option, - codec: Option, - nthreads: Option, - from_bytes_cb: Option, - to_bytes_cb: Option, - ) -> PyResult { - let mut cparams = CParams::from_typesize(typesize.unwrap_or(1)) - .set_codec(codec.map_or_else(Codec::default, Into::into)) - .set_clevel(clevel.map_or_else(CLevel::default, Into::into)) - .set_filter(filter.map_or_else(Filter::default, Into::into)) - .set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); - let mut dparams = - DParams::default().set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); + mod _decompressor { + use super::*; + crate::make_decompressor!(blosc2); + } + #[pymodule_export] + use _decompressor::Decompressor; + + /// Represents a single compressed 'chunk' of data. Analogous to Lz4 block or snappy's raw format in Blosc2 + #[pyclass(name = "Chunk")] + pub struct PyChunk(Chunk); + + #[pymethods] + impl PyChunk { + /// Construct a Chunk from compressing + #[classmethod] + #[pyo3(signature = (src, typesize=None, clevel=None, filter=None, codec=None))] + pub fn compress( + _cls: &Bound<'_, pyo3::types::PyType>, + src: BytesType, + typesize: Option, + clevel: Option, + filter: Option, + codec: Option, + ) -> PyResult { + let typesize = typesize.or_else(|| Some(src.itemsize())); + let clevel = clevel.map(Into::into); + let filter = filter.map(Into::into); + let codec = codec.map(Into::into); + let chunk = + Chunk::compress(src.as_bytes(), typesize, clevel, filter, codec).map_err(CompressionError::from_err)?; + Ok(Self(chunk)) + } - let mut storage = Storage::default() - .set_contiguous(true) - .set_cparams(&mut cparams) - .set_dparams(&mut dparams); - if let Some(pth) = path { - storage = storage.set_urlpath(pth).map_err(CompressionError::from_err)?; + /// Decompress this chunk into bytes buffer + pub fn decompress(&self) -> PyResult { + self.0 + .decompress() + .map_err(DecompressionError::from_err) + .map(RustyBuffer::from) } - let schunk = SChunk::new(storage); - Ok(Self { - schunk, - from_bytes_cb, - to_bytes_cb, - }) - } + /// Get raw bytes of this Chunk + pub fn raw(&self) -> PyResult<&[u8]> { + self.0 + .as_slice() + .map_err(|e| exceptions::PyBufferError::new_err(e.to_string())) + } - /// Create a `SChunk` from `Compressor` - #[classmethod] - pub fn from_compressor(_cls: &PyAny, compressor: &PyAny) -> PyResult { - let compressor: Compressor = compressor.extract()?; - match compressor.0.as_ref() { - Some(inner) => Ok(Self { - schunk: inner.clone(), - from_bytes_cb: None, - to_bytes_cb: None, - }), - None => Err(exceptions::PyValueError::new_err( - "Provided compressor has been consumed", - )), + /// repr + pub fn __repr__(&self) -> PyResult { + let ratio = self.0.compression_ratio().map_err(CompressionError::from_err)?; + let info = self.0.info().map_err(CompressionError::from_err)?; + let repr = format!( + "Chunk", + info.cbytes(), + info.nbytes() + ); + Ok(repr) } } - /// Get a Compressor interface to this SChunk - pub fn as_compressor(&self) -> Compressor { - Compressor(Some(self.schunk.clone())) + /// SChunk interface + #[pyclass(name = "SChunk")] + pub struct PySChunk { + schunk: SChunk, + from_bytes_cb: Option, + to_bytes_cb: Option, } - /// Get a slice of decompressed data - pub fn get_slice_buffer(&self, start: usize, stop: usize) -> PyResult { - self.schunk - .get_slice_buffer(start, stop) - .map(RustyBuffer::from) - .map_err(CompressionError::from_err) - } + unsafe impl Send for PySChunk {} - /// Get the typsize of the SChunk's items - #[getter] - pub fn typesize(&self) -> usize { - self.schunk.typesize() + // Trampoline function from PySChunk, since generics not allowed. + // Call a function on PyObject which may be BytesType, or have a `converter` python function to convert + // the input PyObject `buf` into a `BytesType`, then call the intended operation on the bytes + #[inline] + fn try_to_bytes_with_op(py: Python, buf: PyObject, converter: Option<&PyObject>, op: F) -> PyResult + where + F: FnOnce(&[u8]) -> PyResult, + { + match buf.extract::(py) { + Ok(bt) => op(bt.as_bytes()), + Err(_) => { + if let Some(to_bytes_cb) = &converter { + let obj = to_bytes_cb.call_bound(py, (&buf,), None)?; + let bytestype = obj.extract::(py)?; + op(bytestype.as_bytes()) + } else { + let msg = "Could not convert to variant of `BytesType` and no `to_bytes_cb` function set"; + return Err(CompressionError::new_err(msg)); + } + } + } + .map_err(CompressionError::from_err) } - /// Number of uncompressed bytes - #[getter] - pub fn nbytes(&self) -> usize { - self.schunk.nbytes() + /// Helper function to convert a RustyBuffer to some other PyObject + /// as defined by user callback converter function + #[inline] + fn maybe_convert_buffer(py: Python, buf: RustyBuffer, converter: Option<&PyObject>) -> PyResult { + match converter { + Some(convert) => convert.call_bound(py, (buf,), None), + None => Ok(buf.into_py(py)), + } } - /// Number of compressed bytes - #[getter] - pub fn cbytes(&self) -> usize { - self.schunk.cbytes() - } + #[pymethods] + impl PySChunk { + /// Construct a new SChunk + #[new] + #[pyo3(signature = ( + path=None, + typesize=None, + clevel=None, + filter=None, + codec=None, + nthreads=None, + from_bytes_cb=None, + to_bytes_cb=None, + ))] + pub fn __init__( + path: Option, + typesize: Option, + clevel: Option, + filter: Option, + codec: Option, + nthreads: Option, + from_bytes_cb: Option, + to_bytes_cb: Option, + ) -> PyResult { + let mut cparams = CParams::from_typesize(typesize.unwrap_or(1)) + .set_codec(codec.map_or_else(Codec::default, Into::into)) + .set_clevel(clevel.map_or_else(CLevel::default, Into::into)) + .set_filter(filter.map_or_else(Filter::default, Into::into)) + .set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); + let mut dparams = + DParams::default().set_nthreads(nthreads.unwrap_or_else(libcramjam::blosc2::blosc2::get_nthreads)); + + let mut storage = Storage::default() + .set_contiguous(true) + .set_cparams(&mut cparams) + .set_dparams(&mut dparams); + if let Some(pth) = path { + storage = storage.set_urlpath(pth).map_err(CompressionError::from_err)?; + } - /// Get number of chunks in this SChunk - #[getter] - pub fn nchunks(&self) -> usize { - self.schunk.n_chunks() - } + let schunk = SChunk::new(storage); + Ok(Self { + schunk, + from_bytes_cb, + to_bytes_cb, + }) + } - /// Current compression ratio - #[getter] - pub fn compression_ratio(&self) -> f32 { - self.schunk.compression_ratio() - } + /// Create a `SChunk` from `Compressor` + #[classmethod] + pub fn from_compressor( + _cls: &Bound<'_, pyo3::types::PyType>, + compressor: &Bound<'_, pyo3::types::PyType>, + ) -> PyResult { + let compressor: Compressor = compressor.extract()?; + match compressor.0.as_ref() { + Some(inner) => Ok(Self { + schunk: inner.clone(), + from_bytes_cb: None, + to_bytes_cb: None, + }), + None => Err(exceptions::PyValueError::new_err( + "Provided compressor has been consumed", + )), + } + } - /// Get the SChunk file path, if any. - #[getter] - pub fn path(&self) -> Option { - self.schunk.path() - } + /// Get a Compressor interface to this SChunk + pub fn as_compressor(&self) -> Compressor { + Compressor(Some(self.schunk.clone())) + } - /// Append/compress a buffer into this SChunk, returning the new number of chunks - pub fn append_buffer(&mut self, py: Python, buf: PyObject) -> PyResult { - try_to_bytes_with_op(py, buf, self.to_bytes_cb.as_ref(), |bytes| { - self.schunk.append_buffer(bytes).map_err(CompressionError::from_err) - }) - } + /// Get a slice of decompressed data + pub fn get_slice_buffer(&self, start: usize, stop: usize) -> PyResult { + self.schunk + .get_slice_buffer(start, stop) + .map(RustyBuffer::from) + .map_err(CompressionError::from_err) + } - /// Decompress a specific chunk - pub fn decompress_chunk(&mut self, py: Python, nchunk: usize) -> PyResult { - self.schunk - .decompress_chunk_vec(nchunk) - .map_err(DecompressionError::from_err) - .map(RustyBuffer::from) - .and_then(|buf| maybe_convert_buffer(py, buf, self.from_bytes_cb.as_ref())) - } + /// Get the typsize of the SChunk's items + #[getter] + pub fn typesize(&self) -> usize { + self.schunk.typesize() + } - /// Get a specific Chunk from this SChunk - pub fn get_chunk(&self, nchunk: usize) -> PyResult { - self.schunk - .get_chunk(nchunk) - .map_err(CompressionError::from_err) - .map(PyChunk) - } + /// Number of uncompressed bytes + #[getter] + pub fn nbytes(&self) -> usize { + self.schunk.nbytes() + } - /// Return the current _raw_ SChunk frame data - pub fn frame(&self) -> PyResult<&[u8]> { - self.schunk.frame().map_err(CompressionError::from_err) - } + /// Number of compressed bytes + #[getter] + pub fn cbytes(&self) -> usize { + self.schunk.cbytes() + } + + /// Get number of chunks in this SChunk + #[getter] + pub fn nchunks(&self) -> usize { + self.schunk.n_chunks() + } + + /// Current compression ratio + #[getter] + pub fn compression_ratio(&self) -> f32 { + self.schunk.compression_ratio() + } - /// Get a slice of SChunk (uncompressed) - pub fn __getitem__(&self, py: Python, slice: &PySlice) -> PyResult { - let indices = slice.indices(self.len() as _)?; - self.schunk - .get_slice_buffer(indices.start as _, indices.stop as _) - .map(|buf| { - buf.chunks_exact(self.typesize()) - .step_by(indices.step as _) - .flatten() - .map(Clone::clone) - .collect::>() + /// Get the SChunk file path, if any. + #[getter] + pub fn path(&self) -> Option { + self.schunk.path() + } + + /// Append/compress a buffer into this SChunk, returning the new number of chunks + pub fn append_buffer(&mut self, py: Python, buf: PyObject) -> PyResult { + try_to_bytes_with_op(py, buf, self.to_bytes_cb.as_ref(), |bytes| { + self.schunk.append_buffer(bytes).map_err(CompressionError::from_err) }) - .map(RustyBuffer::from) - .map_err(DecompressionError::from_err) - .and_then(|buf| maybe_convert_buffer(py, buf, self.from_bytes_cb.as_ref())) - } + } - /// Set a slice of the SChunk (will compress data given) - pub fn __setitem__(&self, py: Python, slice: &PySlice, buf: PyObject) -> PyResult<()> { - let indices = slice.indices(self.len() as _)?; - if indices.step != 1 { - return Err(CompressionError::new_err( - "Setting with a step other than 1 not implemented", - )); + /// Decompress a specific chunk + pub fn decompress_chunk(&mut self, py: Python, nchunk: usize) -> PyResult { + self.schunk + .decompress_chunk_vec(nchunk) + .map_err(DecompressionError::from_err) + .map(RustyBuffer::from) + .and_then(|buf| maybe_convert_buffer(py, buf, self.from_bytes_cb.as_ref())) } - try_to_bytes_with_op(py, buf, self.to_bytes_cb.as_ref(), |bytes| { + + /// Get a specific Chunk from this SChunk + pub fn get_chunk(&self, nchunk: usize) -> PyResult { self.schunk - .set_slice_buffer(indices.start as _, indices.stop as _, bytes) + .get_chunk(nchunk) .map_err(CompressionError::from_err) - }) - } + .map(PyChunk) + } - /// Length (in items size) of SChunk - pub fn len(&self) -> usize { - self.schunk.len() - } + /// Return the current _raw_ SChunk frame data + pub fn frame(&self) -> PyResult<&[u8]> { + self.schunk.frame().map_err(CompressionError::from_err) + } - /// Numer of items in this SChunk - pub fn __len__(&self) -> usize { - self.len() - } + /// Get a slice of SChunk (uncompressed) + pub fn __getitem__(&self, py: Python, slice: &Bound<'_, PySlice>) -> PyResult { + let indices = slice.indices(self.len() as _)?; + self.schunk + .get_slice_buffer(indices.start as _, indices.stop as _) + .map(|buf| { + buf.chunks_exact(self.typesize()) + .step_by(indices.step as _) + .flatten() + .map(Clone::clone) + .collect::>() + }) + .map(RustyBuffer::from) + .map_err(DecompressionError::from_err) + .and_then(|buf| maybe_convert_buffer(py, buf, self.from_bytes_cb.as_ref())) + } - /// Repr for SChunk - pub fn __repr__(&self) -> String { - let len = self.schunk.len(); - let ratio = self.schunk.compression_ratio(); - let nchunks = self.schunk.n_chunks(); - let nbytes = self.schunk.nbytes(); - let cbytes = self.schunk.cbytes(); - format!("SChunk") - } -} + /// Set a slice of the SChunk (will compress data given) + pub fn __setitem__(&self, py: Python, slice: &Bound<'_, PySlice>, buf: PyObject) -> PyResult<()> { + let indices = slice.indices(self.len() as _)?; + if indices.step != 1 { + return Err(CompressionError::new_err( + "Setting with a step other than 1 not implemented", + )); + } + try_to_bytes_with_op(py, buf, self.to_bytes_cb.as_ref(), |bytes| { + self.schunk + .set_slice_buffer(indices.start as _, indices.stop as _, bytes) + .map_err(CompressionError::from_err) + }) + } -#[pyclass(name = "Filter")] -#[allow(missing_docs)] -#[derive(Clone)] -pub enum PyFilter { - NoFilter, - Shuffle, - BitShuffle, - Delta, - TruncPrec, - LastFilter, - LastRegisteredFilter, -} + /// Length (in items size) of SChunk + pub fn len(&self) -> usize { + self.schunk.len() + } -impl Into for PyFilter { - #[inline] - fn into(self) -> Filter { - match self { - Self::NoFilter => Filter::NoFilter, - Self::Shuffle => Filter::Shuffle, - Self::BitShuffle => Filter::BitShuffle, - Self::Delta => Filter::Delta, - Self::TruncPrec => Filter::TruncPrec, - Self::LastFilter => Filter::LastFilter, - Self::LastRegisteredFilter => Filter::LastRegisteredFilter, + /// Numer of items in this SChunk + pub fn __len__(&self) -> usize { + self.len() } - } -} -#[pyclass(name = "CLevel")] -#[allow(missing_docs)] -#[derive(Clone)] -pub enum PyCLevel { - Zero, - One, - Two, - Three, - Four, - Five, - Six, - Seven, - Eight, - Nine, -} + /// Repr for SChunk + pub fn __repr__(&self) -> String { + let len = self.schunk.len(); + let ratio = self.schunk.compression_ratio(); + let nchunks = self.schunk.n_chunks(); + let nbytes = self.schunk.nbytes(); + let cbytes = self.schunk.cbytes(); + format!( + "SChunk" + ) + } + } -impl Into for PyCLevel { - #[inline] - fn into(self) -> CLevel { - match self { - Self::Zero => CLevel::Zero, - Self::One => CLevel::One, - Self::Two => CLevel::Two, - Self::Three => CLevel::Three, - Self::Four => CLevel::Four, - Self::Five => CLevel::Five, - Self::Six => CLevel::Six, - Self::Seven => CLevel::Seven, - Self::Eight => CLevel::Eight, - Self::Nine => CLevel::Nine, + #[pyclass(name = "Filter", eq, eq_int)] + #[allow(missing_docs)] + #[derive(Clone, PartialEq)] + pub enum PyFilter { + NoFilter, + Shuffle, + BitShuffle, + Delta, + TruncPrec, + LastFilter, + LastRegisteredFilter, + } + + impl Into for PyFilter { + #[inline] + fn into(self) -> Filter { + match self { + Self::NoFilter => Filter::NoFilter, + Self::Shuffle => Filter::Shuffle, + Self::BitShuffle => Filter::BitShuffle, + Self::Delta => Filter::Delta, + Self::TruncPrec => Filter::TruncPrec, + Self::LastFilter => Filter::LastFilter, + Self::LastRegisteredFilter => Filter::LastRegisteredFilter, + } } } -} -#[pyclass(name = "Codec")] -#[allow(missing_docs)] -#[derive(Clone)] -pub enum PyCodec { - BloscLz, - LZ4, - LZ4HC, - ZLIB, - ZSTD, - LastCodec, - LastRegisteredCodec, -} + #[pyclass(name = "CLevel", eq, eq_int)] + #[allow(missing_docs)] + #[derive(Clone, PartialEq)] + pub enum PyCLevel { + Zero, + One, + Two, + Three, + Four, + Five, + Six, + Seven, + Eight, + Nine, + } + + impl Into for PyCLevel { + #[inline] + fn into(self) -> CLevel { + match self { + Self::Zero => CLevel::Zero, + Self::One => CLevel::One, + Self::Two => CLevel::Two, + Self::Three => CLevel::Three, + Self::Four => CLevel::Four, + Self::Five => CLevel::Five, + Self::Six => CLevel::Six, + Self::Seven => CLevel::Seven, + Self::Eight => CLevel::Eight, + Self::Nine => CLevel::Nine, + } + } + } -impl Into for PyCodec { - #[inline] - fn into(self) -> Codec { - match self { - Self::BloscLz => Codec::BloscLz, - Self::LZ4 => Codec::LZ4, - Self::LZ4HC => Codec::LZ4HC, - Self::ZLIB => Codec::ZLIB, - Self::ZSTD => Codec::ZSTD, - Self::LastCodec => Codec::LastCodec, - Self::LastRegisteredCodec => Codec::LastRegisteredCodec, + #[pyclass(name = "Codec", eq, eq_int)] + #[allow(missing_docs)] + #[derive(Clone, PartialEq)] + pub enum PyCodec { + BloscLz, + LZ4, + LZ4HC, + ZLIB, + ZSTD, + LastCodec, + LastRegisteredCodec, + } + + impl Into for PyCodec { + #[inline] + fn into(self) -> Codec { + match self { + Self::BloscLz => Codec::BloscLz, + Self::LZ4 => Codec::LZ4, + Self::LZ4HC => Codec::LZ4HC, + Self::ZLIB => Codec::ZLIB, + Self::ZSTD => Codec::ZSTD, + Self::LastCodec => Codec::LastCodec, + Self::LastRegisteredCodec => Codec::LastRegisteredCodec, + } } } -} -/// Set number of threads, returning previous number -#[pyfunction] -pub fn set_nthreads(n: usize) -> usize { - libcramjam::blosc2::blosc2::set_nthreads(n) -} + /// Set number of threads, returning previous number + #[pyfunction] + pub fn set_nthreads(n: usize) -> usize { + libcramjam::blosc2::blosc2::set_nthreads(n) + } -/// get current number of threads set -#[pyfunction] -pub fn get_nthreads() -> usize { - libcramjam::blosc2::blosc2::get_nthreads() -} + /// get current number of threads set + #[pyfunction] + pub fn get_nthreads() -> usize { + libcramjam::blosc2::blosc2::get_nthreads() + } -/// Print the blosc2 library version -#[pyfunction] -pub fn get_version() -> PyResult { - let version = - libcramjam::blosc2::blosc2::get_version_string().map_err(|e| PyRuntimeError::new_err(e.to_string()))?; - Ok(format!("{}", version)) -} + /// Print the blosc2 library version + #[pyfunction] + pub fn get_version() -> PyResult { + let version = + libcramjam::blosc2::blosc2::get_version_string().map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + Ok(format!("{}", version)) + } -/// Get the max compressed size of some raw input length in bytes. -#[pyfunction] -pub fn max_compressed_len(len_bytes: usize) -> usize { - libcramjam::blosc2::blosc2::max_compress_len_bytes(len_bytes) + /// Get the max compressed size of some raw input length in bytes. + #[pyfunction] + pub fn max_compressed_len(len_bytes: usize) -> usize { + libcramjam::blosc2::blosc2::max_compress_len_bytes(len_bytes) + } } diff --git a/src/brotli.rs b/src/brotli.rs index 444d640a..28407d77 100644 --- a/src/brotli.rs +++ b/src/brotli.rs @@ -1,97 +1,106 @@ //! brotli de/compression interface -use crate::exceptions::{CompressionError, DecompressionError}; -use crate::io::RustyBuffer; -use crate::{AsBytes, BytesType}; use pyo3::prelude::*; -use pyo3::wrap_pyfunction; -use pyo3::PyResult; -use std::io::{Cursor, Write}; -const DEFAULT_COMPRESSION_LEVEL: u32 = 11; -const BUF_SIZE: usize = 1 << 17; // Taken from brotli kCompressFragementTwoPassBlockSize -const LGWIN: u32 = 22; +/// brotli de/compression interface +#[pymodule] +pub mod brotli { -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(compress, m)?)?; - m.add_function(wrap_pyfunction!(decompress, m)?)?; - m.add_function(wrap_pyfunction!(compress_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_into, m)?)?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} - -/// Brotli decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.brotli.decompress(compressed_bytes, output_len=Optional[int]) -/// ``` -#[pyfunction] -pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::brotli::decompress[data], output_len = output_len) - .map_err(DecompressionError::from_err) -} - -/// Brotli compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.brotli.compress(b'some bytes here', level=9, output_len=Option[int]) # level defaults to 11 -/// ``` -#[pyfunction] -pub fn compress(py: Python, data: BytesType, level: Option, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::brotli::compress[data], output_len = output_len, level) - .map_err(CompressionError::from_err) -} + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::RustyBuffer; + use crate::{AsBytes, BytesType}; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::{Cursor, Write}; -/// Compress directly into an output buffer -#[pyfunction] -pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { - crate::generic!(py, libcramjam::brotli::compress[input, output], level).map_err(CompressionError::from_err) -} + const DEFAULT_COMPRESSION_LEVEL: u32 = 11; + const BUF_SIZE: usize = 1 << 17; // Taken from brotli kCompressFragementTwoPassBlockSize + const LGWIN: u32 = 22; -/// Decompress directly into an output buffer -#[pyfunction] -pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - crate::generic!(py, libcramjam::brotli::decompress[input, output]).map_err(DecompressionError::from_err) -} + /// Brotli decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.brotli.decompress(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::brotli::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } -/// Brotli Compressor object for streaming compression -#[pyclass] -pub struct Compressor { - inner: Option>>>, -} + /// Brotli compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.brotli.compress(b'some bytes here', level=9, output_len=Option[int]) # level defaults to 11 + /// ``` + #[pyfunction] + #[pyo3(signature = (data, level=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + level: Option, + output_len: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::brotli::compress[data], output_len = output_len, level) + .map_err(CompressionError::from_err) + } -#[pymethods] -impl Compressor { - /// Initialize a new `Compressor` instance. - #[new] - pub fn __init__(level: Option) -> PyResult { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - let inner = libcramjam::brotli::brotli::CompressorWriter::new(Cursor::new(vec![]), BUF_SIZE, level, LGWIN); - Ok(Self { inner: Some(inner) }) + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, level=None))] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { + crate::generic!(py, libcramjam::brotli::compress[input, output], level).map_err(CompressionError::from_err) } - /// Compress input into the current compressor's stream. - pub fn compress(&mut self, input: &[u8]) -> PyResult { - crate::io::stream_compress(&mut self.inner, input) + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::brotli::decompress[input, output]).map_err(DecompressionError::from_err) } - /// Flush and return current compressed stream - pub fn flush(&mut self) -> PyResult { - crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + /// Brotli Compressor object for streaming compression + #[pyclass] + pub struct Compressor { + inner: Option>>>, } - /// Consume the current compressor state and return the compressed stream - /// **NB** The compressor will not be usable after this method is called. - pub fn finish(&mut self) -> PyResult { - crate::io::stream_finish(&mut self.inner, |mut inner| { - inner.flush().map(|_| inner.into_inner().into_inner()) - }) + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (level=None))] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); + let inner = libcramjam::brotli::brotli::CompressorWriter::new(Cursor::new(vec![]), BUF_SIZE, level, LGWIN); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |mut inner| { + inner.flush().map(|_| inner.into_inner().into_inner()) + }) + } + } + mod _decompressor { + use super::*; + crate::make_decompressor!(brotli); } + #[pymodule_export] + use _decompressor::Decompressor; } - -crate::make_decompressor!(brotli); diff --git a/src/bzip2.rs b/src/bzip2.rs index ec66a5f6..5fddf08c 100644 --- a/src/bzip2.rs +++ b/src/bzip2.rs @@ -1,94 +1,104 @@ //! bzip2 de/compression interface -use crate::exceptions::{CompressionError, DecompressionError}; -use crate::io::RustyBuffer; -use crate::{AsBytes, BytesType}; use pyo3::prelude::*; -use pyo3::wrap_pyfunction; -use pyo3::PyResult; -use std::io::Cursor; -const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +/// bzip2 de/compression interface +#[pymodule] +pub mod bzip2 { -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(compress, m)?)?; - m.add_function(wrap_pyfunction!(decompress, m)?)?; - m.add_function(wrap_pyfunction!(compress_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_into, m)?)?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} - -/// bzip2 decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.bzip2.decompress(compressed_bytes, output_len=Optional[int]) -/// ``` -#[pyfunction] -pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::bzip2::decompress[data], output_len = output_len) - .map_err(DecompressionError::from_err) -} + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::RustyBuffer; + use crate::{AsBytes, BytesType}; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::Cursor; -/// bzip2 compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.bzip2.compress(b'some bytes here', level=6, output_len=Option[int]) # level defaults to 6 -/// ``` -#[pyfunction] -pub fn compress(py: Python, data: BytesType, level: Option, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::bzip2::compress[data], output_len = output_len, level) - .map_err(CompressionError::from_err) -} + const DEFAULT_COMPRESSION_LEVEL: u32 = 6; -/// Compress directly into an output buffer -#[pyfunction] -pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { - crate::generic!(py, libcramjam::bzip2::compress[input, output], level).map_err(CompressionError::from_err) -} + /// bzip2 decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.bzip2.decompress(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::bzip2::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } -/// Decompress directly into an output buffer -#[pyfunction] -pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - crate::generic!(py, libcramjam::bzip2::decompress[input, output]).map_err(DecompressionError::from_err) -} + /// bzip2 compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.bzip2.compress(b'some bytes here', level=6, output_len=Option[int]) # level defaults to 6 + /// ``` + #[pyfunction] + #[pyo3(signature = (data, level=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + level: Option, + output_len: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::bzip2::compress[data], output_len = output_len, level) + .map_err(CompressionError::from_err) + } -/// bzip2 Compressor object for streaming compression -#[pyclass] -pub struct Compressor { - inner: Option>>>, -} + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, level=None))] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { + crate::generic!(py, libcramjam::bzip2::compress[input, output], level).map_err(CompressionError::from_err) + } -#[pymethods] -impl Compressor { - /// Initialize a new `Compressor` instance. - #[new] - pub fn __init__(level: Option) -> PyResult { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - let comp = libcramjam::bzip2::bzip2::Compression::new(level); - let inner = libcramjam::bzip2::bzip2::write::BzEncoder::new(Cursor::new(vec![]), comp); - Ok(Self { inner: Some(inner) }) + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::bzip2::decompress[input, output]).map_err(DecompressionError::from_err) } - /// Compress input into the current compressor's stream. - pub fn compress(&mut self, input: &[u8]) -> PyResult { - crate::io::stream_compress(&mut self.inner, input) + /// bzip2 Compressor object for streaming compression + #[pyclass] + pub struct Compressor { + inner: Option>>>, } - /// Flush and return current compressed stream - pub fn flush(&mut self) -> PyResult { - crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (level=None))] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); + let comp = libcramjam::bzip2::bzip2::Compression::new(level); + let inner = libcramjam::bzip2::bzip2::write::BzEncoder::new(Cursor::new(vec![]), comp); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + } } - /// Consume the current compressor state and return the compressed stream - /// **NB** The compressor will not be usable after this method is called. - pub fn finish(&mut self) -> PyResult { - crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + mod _decompressor { + use super::*; + crate::make_decompressor!(bzip2); } + #[pymodule_export] + use _decompressor::Decompressor; } - -crate::make_decompressor!(bzip2); diff --git a/src/deflate.rs b/src/deflate.rs index 584ff610..f64172db 100644 --- a/src/deflate.rs +++ b/src/deflate.rs @@ -1,94 +1,104 @@ //! deflate de/compression interface -use crate::exceptions::{CompressionError, DecompressionError}; -use crate::io::RustyBuffer; -use crate::{AsBytes, BytesType}; use pyo3::prelude::*; -use pyo3::wrap_pyfunction; -use pyo3::PyResult; -use std::io::Cursor; -const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +/// deflate de/compression interface +#[pymodule] +pub mod deflate { -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(compress, m)?)?; - m.add_function(wrap_pyfunction!(decompress, m)?)?; - m.add_function(wrap_pyfunction!(compress_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_into, m)?)?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} - -/// Deflate decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.deflate.decompress(compressed_bytes, output_len=Optional[int]) -/// ``` -#[pyfunction] -pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::deflate::decompress[data], output_len = output_len) - .map_err(DecompressionError::from_err) -} + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::RustyBuffer; + use crate::{AsBytes, BytesType}; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::Cursor; -/// Deflate compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.deflate.compress(b'some bytes here', level=5, output_len=Optional[int]) # level defaults to 6 -/// ``` -#[pyfunction] -pub fn compress(py: Python, data: BytesType, level: Option, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::deflate::compress[data], output_len = output_len, level) - .map_err(CompressionError::from_err) -} + const DEFAULT_COMPRESSION_LEVEL: u32 = 6; -/// Compress directly into an output buffer -#[pyfunction] -pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { - crate::generic!(py, libcramjam::deflate::compress[input, output], level).map_err(CompressionError::from_err) -} + /// Deflate decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.deflate.decompress(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::deflate::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } -/// Decompress directly into an output buffer -#[pyfunction] -pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - crate::generic!(py, libcramjam::deflate::decompress[input, output]).map_err(DecompressionError::from_err) -} + /// Deflate compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.deflate.compress(b'some bytes here', level=5, output_len=Optional[int]) # level defaults to 6 + /// ``` + #[pyfunction] + #[pyo3(signature = (data, level=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + level: Option, + output_len: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::deflate::compress[data], output_len = output_len, level) + .map_err(CompressionError::from_err) + } -/// Deflate Compressor object for streaming compression -#[pyclass] -pub struct Compressor { - inner: Option>>>, -} + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, level=None))] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { + crate::generic!(py, libcramjam::deflate::compress[input, output], level).map_err(CompressionError::from_err) + } -#[pymethods] -impl Compressor { - /// Initialize a new `Compressor` instance. - #[new] - pub fn __init__(level: Option) -> PyResult { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - let compression = libcramjam::deflate::flate2::Compression::new(level); - let inner = libcramjam::deflate::flate2::write::DeflateEncoder::new(Cursor::new(vec![]), compression); - Ok(Self { inner: Some(inner) }) + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::deflate::decompress[input, output]).map_err(DecompressionError::from_err) } - /// Compress input into the current compressor's stream. - pub fn compress(&mut self, input: &[u8]) -> PyResult { - crate::io::stream_compress(&mut self.inner, input) + /// Deflate Compressor object for streaming compression + #[pyclass] + pub struct Compressor { + inner: Option>>>, } - /// Flush and return current compressed stream - pub fn flush(&mut self) -> PyResult { - crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (level=None))] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); + let compression = libcramjam::deflate::flate2::Compression::new(level); + let inner = libcramjam::deflate::flate2::write::DeflateEncoder::new(Cursor::new(vec![]), compression); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + } } - /// Consume the current compressor state and return the compressed stream - /// **NB** The compressor will not be usable after this method is called. - pub fn finish(&mut self) -> PyResult { - crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + mod _decompressor { + use super::*; + crate::make_decompressor!(deflate); } + #[pymodule_export] + use _decompressor::Decompressor; } - -crate::make_decompressor!(deflate); diff --git a/src/experimental.rs b/src/experimental.rs index fcde38ab..2ba12fe9 100644 --- a/src/experimental.rs +++ b/src/experimental.rs @@ -2,16 +2,13 @@ //! This module makes no effort to maintain SemVer between //! releases. use pyo3::prelude::*; -use pyo3::PyResult; -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - Python::with_gil(|py| add_experimental_modules(py, m))?; - Ok(()) -} -fn add_experimental_modules(py: Python, m: &PyModule) -> PyResult<()> { - use crate::blosc2; - let sub_mod = PyModule::new(py, "blosc2")?; - blosc2::init_py_module(sub_mod)?; - m.add_submodule(sub_mod)?; - Ok(()) +/// Experimental and unstable implementations. +/// This module makes no effort to maintain SemVer between +/// releases. +#[pymodule] +pub mod experimental { + + #[pymodule_export] + use crate::blosc2::blosc2; } diff --git a/src/gzip.rs b/src/gzip.rs index 0a7bde30..b1dd7fab 100644 --- a/src/gzip.rs +++ b/src/gzip.rs @@ -1,96 +1,106 @@ //! gzip de/compression interface -use crate::exceptions::{CompressionError, DecompressionError}; -use crate::io::{AsBytes, RustyBuffer}; -use crate::BytesType; use pyo3::prelude::*; -use pyo3::wrap_pyfunction; -use pyo3::PyResult; -use std::io::Cursor; -const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +/// gzip de/compression interface +#[pymodule] +pub mod gzip { -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(compress, m)?)?; - m.add_function(wrap_pyfunction!(decompress, m)?)?; - m.add_function(wrap_pyfunction!(compress_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_into, m)?)?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} - -/// Gzip decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.gzip.decompress(compressed_bytes, output_len=Optional[int]) -/// ``` -#[pyfunction] -pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::gzip::decompress[data], output_len = output_len) - .map_err(DecompressionError::from_err) -} + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::{AsBytes, RustyBuffer}; + use crate::BytesType; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::Cursor; -/// Gzip compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.gzip.compress(b'some bytes here', level=2, output_len=Optional[int]) # Level defaults to 6 -/// ``` -#[pyfunction] -pub fn compress(py: Python, data: BytesType, level: Option, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::gzip::compress[data], output_len = output_len, level) - .map_err(CompressionError::from_err) -} + const DEFAULT_COMPRESSION_LEVEL: u32 = 6; -/// Compress directly into an output buffer -#[pyfunction] -pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { - crate::generic!(py, libcramjam::gzip::compress[input, output], level).map_err(CompressionError::from_err) -} + /// Gzip decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.gzip.decompress(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::gzip::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } -/// Decompress directly into an output buffer -#[pyfunction] -pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - crate::generic!(py, libcramjam::gzip::decompress[input, output]).map_err(DecompressionError::from_err) -} + /// Gzip compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.gzip.compress(b'some bytes here', level=2, output_len=Optional[int]) # Level defaults to 6 + /// ``` + #[pyfunction] + #[pyo3(signature = (data, level=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + level: Option, + output_len: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::gzip::compress[data], output_len = output_len, level) + .map_err(CompressionError::from_err) + } -/// GZIP Compressor object for streaming compression -#[pyclass] -pub struct Compressor { - inner: Option>>>, -} + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, level=None))] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { + crate::generic!(py, libcramjam::gzip::compress[input, output], level).map_err(CompressionError::from_err) + } -#[pymethods] -impl Compressor { - /// Initialize a new `Compressor` instance. - #[new] - pub fn __init__(level: Option) -> PyResult { - let level = level.unwrap_or(DEFAULT_COMPRESSION_LEVEL); - let inner = libcramjam::gzip::flate2::write::GzEncoder::new( - Cursor::new(vec![]), - libcramjam::gzip::flate2::Compression::new(level), - ); - Ok(Self { inner: Some(inner) }) + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::gzip::decompress[input, output]).map_err(DecompressionError::from_err) } - /// Compress input into the current compressor's stream. - pub fn compress(&mut self, input: &[u8]) -> PyResult { - crate::io::stream_compress(&mut self.inner, input) + /// GZIP Compressor object for streaming compression + #[pyclass] + pub struct Compressor { + inner: Option>>>, } - /// Flush and return current compressed stream - pub fn flush(&mut self) -> PyResult { - crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (level=None))] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or(DEFAULT_COMPRESSION_LEVEL); + let inner = libcramjam::gzip::flate2::write::GzEncoder::new( + Cursor::new(vec![]), + libcramjam::gzip::flate2::Compression::new(level), + ); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + } } - /// Consume the current compressor state and return the compressed stream - /// **NB** The compressor will not be usable after this method is called. - pub fn finish(&mut self) -> PyResult { - crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + mod _decompressor { + use super::*; + crate::make_decompressor!(gzip); } + #[pymodule_export] + use _decompressor::Decompressor; } - -crate::make_decompressor!(gzip); diff --git a/src/io.rs b/src/io.rs index 6d7527e5..0e88fb0c 100644 --- a/src/io.rs +++ b/src/io.rs @@ -69,6 +69,7 @@ impl RustyFile { /// b'tes' /// ``` #[new] + #[pyo3(signature = (path, read = None, write = None, truncate = None, append = None))] pub fn __init__( path: &str, read: Option, @@ -94,7 +95,8 @@ impl RustyFile { } /// Read from the file in its current position, returns `bytes`; optionally specify number of /// bytes to read. - pub fn read<'a>(&mut self, py: Python<'a>, n_bytes: Option) -> PyResult<&'a PyBytes> { + #[pyo3(signature = (n_bytes=None))] + pub fn read<'a>(&mut self, py: Python<'a>, n_bytes: Option) -> PyResult> { read(self, py, n_bytes) } /// Read from the file in its current position, into a [`BytesType`](../enum.BytesType.html) object. @@ -109,6 +111,7 @@ impl RustyFile { /// 1: from current stream position /// 2: from end of the stream /// ``` + #[pyo3(signature = (position, whence=None))] pub fn seek(&mut self, position: isize, whence: Option) -> PyResult { let pos = match whence.unwrap_or_else(|| 0) { 0 => SeekFrom::Start(position as u64), @@ -208,7 +211,7 @@ impl PythonBuffer { { Python::with_gil(|py| { let is_memoryview = unsafe { ffi::PyMemoryView_Check(self.owner.as_ptr()) } == 1; - if is_memoryview || self.owner.as_ref(py).is_instance_of::() { + if is_memoryview || self.owner.bind(py).is_instance_of::() { Err(pyo3::exceptions::PyTypeError::new_err( "With PyPy, an output of type `bytes` or `memoryview` does not work. See issue pypy/pypy#4918", )) @@ -252,14 +255,14 @@ impl Drop for PythonBuffer { } impl<'py> FromPyObject<'py> for PythonBuffer { - fn extract(obj: &'py PyAny) -> PyResult { + fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult { Self::try_from(obj) } } -impl<'py> TryFrom<&'py PyAny> for PythonBuffer { +impl<'a, 'py> TryFrom<&'a Bound<'py, PyAny>> for PythonBuffer { type Error = PyErr; - fn try_from(obj: &'py PyAny) -> Result { + fn try_from(obj: &'a Bound<'py, PyAny>) -> Result { let mut buf = Box::new(mem::MaybeUninit::uninit()); let rc = unsafe { ffi::PyObject_GetBuffer(obj.as_ptr(), buf.as_mut_ptr(), ffi::PyBUF_CONTIG_RO) }; if rc != 0 { @@ -368,6 +371,7 @@ impl From> for RustyBuffer { impl RustyBuffer { /// Instantiate the object, optionally with any supported bytes-like object in [BytesType](../enum.BytesType.html) #[new] + #[pyo3(signature = (data=None))] pub fn __init__(mut data: Option>) -> PyResult { let mut buf = vec![]; if let Some(bytes) = data.as_mut() { @@ -389,7 +393,8 @@ impl RustyBuffer { Ok(r as usize) } /// Read from the buffer in its current position, returns bytes; optionally specify number of bytes to read. - pub fn read<'a>(&mut self, py: Python<'a>, n_bytes: Option) -> PyResult<&'a PyBytes> { + #[pyo3(signature = (n_bytes=None))] + pub fn read<'a>(&mut self, py: Python<'a>, n_bytes: Option) -> PyResult> { read(self, py, n_bytes) } /// Read from the buffer in its current position, into a [BytesType](../enum.BytesType.html) object. @@ -403,6 +408,7 @@ impl RustyBuffer { /// 1: from current stream position /// 2: from end of the stream /// ``` + #[pyo3(signature = (position, whence=None))] pub fn seek(&mut self, position: isize, whence: Option) -> PyResult { let pos = match whence.unwrap_or_else(|| 0) { 0 => SeekFrom::Start(position as u64), @@ -506,16 +512,16 @@ fn write(input: &mut BytesType, output: &mut W) -> std::io::Result(reader: &mut R, py: Python<'a>, n_bytes: Option) -> PyResult<&'a PyBytes> { +fn read<'a, R: Read>(reader: &mut R, py: Python<'a>, n_bytes: Option) -> PyResult> { match n_bytes { - Some(n) => PyBytes::new_with(py, n, |buf| { + Some(n) => PyBytes::new_bound_with(py, n, |buf| { reader.read(buf)?; Ok(()) }), None => { let mut buf = vec![]; reader.read_to_end(&mut buf)?; - Ok(PyBytes::new(py, buf.as_slice())) + Ok(PyBytes::new_bound(py, buf.as_slice())) } } } diff --git a/src/lib.rs b/src/lib.rs index e9fb71a8..5e85ceef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,10 +80,10 @@ use std::io::{Read, Seek, SeekFrom, Write}; pub enum BytesType<'a> { /// [`cramjam.Buffer`](io/struct.RustyBuffer.html) #[pyo3(transparent, annotation = "Buffer")] - RustyBuffer(&'a PyCell), + RustyBuffer(Bound<'a, RustyBuffer>), /// [`cramjam.File`](io/struct.RustyFile.html) #[pyo3(transparent, annotation = "File")] - RustyFile(&'a PyCell), + RustyFile(Bound<'a, RustyFile>), /// `object` implementing the Buffer Protocol #[pyo3(transparent, annotation = "pybuffer")] PyBuffer(PythonBuffer), @@ -369,30 +369,48 @@ macro_rules! make_decompressor { }; } -macro_rules! make_submodule { - ($py:ident -> $parent:ident -> $submodule:ident) => { - let sub_mod = PyModule::new($py, stringify!($submodule))?; - $submodule::init_py_module(sub_mod)?; - $parent.add_submodule(sub_mod)?; - }; -} - #[pymodule] -fn cramjam(py: Python, m: &PyModule) -> PyResult<()> { - m.add("__version__", env!("CARGO_PKG_VERSION"))?; - m.add("CompressionError", py.get_type::())?; - m.add("DecompressionError", py.get_type::())?; - m.add_class::()?; - m.add_class::()?; - make_submodule!(py -> m -> snappy); - make_submodule!(py -> m -> brotli); - make_submodule!(py -> m -> bzip2); - make_submodule!(py -> m -> lz4); - make_submodule!(py -> m -> gzip); - make_submodule!(py -> m -> deflate); - make_submodule!(py -> m -> xz); - make_submodule!(py -> m -> zstd); - make_submodule!(py -> m -> experimental); +mod cramjam { + use super::*; + + #[pymodule_init] + fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("__version__", env!("CARGO_PKG_VERSION"))?; + m.add_class::()?; + m.add_class::()?; + Ok(()) + } + + #[pymodule_export] + use crate::CompressionError; + + #[pymodule_export] + use crate::DecompressionError; + + #[pymodule_export] + use crate::snappy::snappy; + + #[pymodule_export] + use crate::zstd::zstd; + + #[pymodule_export] + use crate::lz4::lz4; + + #[pymodule_export] + use crate::brotli::brotli; + + #[pymodule_export] + use crate::deflate::deflate; + + #[pymodule_export] + use crate::xz::xz; + + #[pymodule_export] + use crate::bzip2::bzip2; + + #[pymodule_export] + use crate::gzip::gzip; - Ok(()) + #[pymodule_export] + use crate::experimental::experimental; } diff --git a/src/lz4.rs b/src/lz4.rs index c9303183..c1aabbe6 100644 --- a/src/lz4.rs +++ b/src/lz4.rs @@ -1,250 +1,266 @@ //! LZ4 de/compression interface -use crate::exceptions::{CompressionError, DecompressionError}; -use crate::io::{AsBytes, RustyBuffer}; -use crate::BytesType; -use libcramjam::lz4::lz4::{BlockMode, ContentChecksum}; use pyo3::prelude::*; -use pyo3::wrap_pyfunction; -use pyo3::PyResult; -use std::io::Cursor; -const DEFAULT_COMPRESSION_LEVEL: u32 = 4; +/// LZ4 de/compression interface +#[pymodule] +pub mod lz4 { -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(compress, m)?)?; - m.add_function(wrap_pyfunction!(decompress, m)?)?; - m.add_function(wrap_pyfunction!(compress_block, m)?)?; - m.add_function(wrap_pyfunction!(decompress_block, m)?)?; + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::{AsBytes, RustyBuffer}; + use crate::BytesType; + use libcramjam::lz4::lz4::{BlockMode, ContentChecksum}; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::Cursor; - m.add_function(wrap_pyfunction!(compress_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_into, m)?)?; - m.add_function(wrap_pyfunction!(compress_block_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_block_into, m)?)?; + const DEFAULT_COMPRESSION_LEVEL: u32 = 4; - m.add_function(wrap_pyfunction!(compress_block_bound, m)?)?; - - m.add_class::()?; - m.add_class::()?; - Ok(()) -} + /// LZ4 compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> # Note, output_len is currently ignored; underlying algorithm does not support reading to slice at this time + /// >>> cramjam.lz4.decompress(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::lz4::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } -/// LZ4 compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> # Note, output_len is currently ignored; underlying algorithm does not support reading to slice at this time -/// >>> cramjam.lz4.decompress(compressed_bytes, output_len=Optional[int]) -/// ``` -#[pyfunction] -pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::lz4::decompress[data], output_len = output_len).map_err(DecompressionError::from_err) -} + /// LZ4 compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> # Note, output_len is currently ignored; underlying algorithm does not support reading to slice at this time + /// >>> cramjam.lz4.compress(b'some bytes here', output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, level=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + level: Option, + output_len: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::lz4::compress[data], output_len = output_len, level) + .map_err(CompressionError::from_err) + } -/// LZ4 compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> # Note, output_len is currently ignored; underlying algorithm does not support reading to slice at this time -/// >>> cramjam.lz4.compress(b'some bytes here', output_len=Optional[int]) -/// ``` -#[pyfunction] -pub fn compress(py: Python, data: BytesType, level: Option, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::lz4::compress[data], output_len = output_len, level) - .map_err(CompressionError::from_err) -} + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, level=None))] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { + crate::generic!(py, libcramjam::lz4::compress[input, output], level).map_err(CompressionError::from_err) + } -/// Compress directly into an output buffer -#[pyfunction] -pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { - crate::generic!(py, libcramjam::lz4::compress[input, output], level).map_err(CompressionError::from_err) -} + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::lz4::decompress[input, output]).map_err(DecompressionError::from_err) + } -/// Decompress directly into an output buffer -#[pyfunction] -pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - crate::generic!(py, libcramjam::lz4::decompress[input, output]).map_err(DecompressionError::from_err) -} + /// LZ4 _block_ decompression. + /// + /// `output_len` is optional, it's the upper bound length of decompressed data; if it's not provided, + /// then it's assumed `store_size=True` was used during compression and length will then be taken + /// from the header, otherwise it's assumed `store_size=False` was used and no prepended size exists in input + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.lz4.decompress_block(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[allow(unused_variables)] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress_block(py: Python, data: BytesType, output_len: Option) -> PyResult { + let bytes = data.as_bytes(); -/// LZ4 _block_ decompression. -/// -/// `output_len` is optional, it's the upper bound length of decompressed data; if it's not provided, -/// then it's assumed `store_size=True` was used during compression and length will then be taken -/// from the header, otherwise it's assumed `store_size=False` was used and no prepended size exists in input -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.lz4.decompress_block(compressed_bytes, output_len=Optional[int]) -/// ``` -#[pyfunction] -#[allow(unused_variables)] -pub fn decompress_block(py: Python, data: BytesType, output_len: Option) -> PyResult { - let bytes = data.as_bytes(); - - py.allow_threads(|| { - match output_len { - Some(n) => { - let mut buf = vec![0u8; n]; - libcramjam::lz4::block::decompress_into(bytes, &mut buf, Some(false)).map(|_| buf) + py.allow_threads(|| { + match output_len { + Some(n) => { + let mut buf = vec![0u8; n]; + libcramjam::lz4::block::decompress_into(bytes, &mut buf, Some(false)).map(|_| buf) + } + None => libcramjam::lz4::block::decompress_vec(bytes), } - None => libcramjam::lz4::block::decompress_vec(bytes), - } - .map_err(DecompressionError::from_err) + .map_err(DecompressionError::from_err) + .map(RustyBuffer::from) + }) + } + + /// LZ4 _block_ compression. + /// + /// The kwargs mostly follow the same definition found in [python-lz4 block.compress](https://python-lz4.readthedocs.io/en/stable/lz4.block.html#module-lz4.block) + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.lz4.compress_block( + /// ... b'some bytes here', + /// ... output_len=Optional[int], + /// ... mode=Option[str], + /// ... acceleration=Option[int], + /// ... compression=Option[int], + /// ... store_size=Option[bool] + /// ... ) + /// ``` + #[pyfunction] + #[allow(unused_variables)] + #[pyo3(signature = (data, output_len=None, mode=None, acceleration=None, compression=None, store_size=None))] + pub fn compress_block( + py: Python, + data: BytesType, + output_len: Option, + mode: Option<&str>, + acceleration: Option, + compression: Option, + store_size: Option, + ) -> PyResult { + let bytes = data.as_bytes(); + py.allow_threads(|| { + libcramjam::lz4::block::compress_vec(bytes, compression.map(|v| v as _), acceleration, store_size) + }) + .map_err(CompressionError::from_err) .map(RustyBuffer::from) - }) -} + } -/// LZ4 _block_ compression. -/// -/// The kwargs mostly follow the same definition found in [python-lz4 block.compress](https://python-lz4.readthedocs.io/en/stable/lz4.block.html#module-lz4.block) -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.lz4.compress_block( -/// ... b'some bytes here', -/// ... output_len=Optional[int], -/// ... mode=Option[str], -/// ... acceleration=Option[int], -/// ... compression=Option[int], -/// ... store_size=Option[bool] -/// ... ) -/// ``` -#[pyfunction] -#[allow(unused_variables)] -pub fn compress_block( - py: Python, - data: BytesType, - output_len: Option, - mode: Option<&str>, - acceleration: Option, - compression: Option, - store_size: Option, -) -> PyResult { - let bytes = data.as_bytes(); - py.allow_threads(|| { - libcramjam::lz4::block::compress_vec(bytes, compression.map(|v| v as _), acceleration, store_size) - }) - .map_err(CompressionError::from_err) - .map(RustyBuffer::from) -} + /// LZ4 _block_ decompression into a pre-allocated buffer. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.lz4.decompress_block_into(compressed_bytes, output_buffer) + /// ``` + #[pyfunction] + pub fn decompress_block_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + let bytes = input.as_bytes(); + let out_bytes = output.as_bytes_mut()?; + py.allow_threads(|| libcramjam::lz4::block::decompress_into(bytes, out_bytes, Some(true))) + .map_err(DecompressionError::from_err) + .map(|v| v as _) + } -/// LZ4 _block_ decompression into a pre-allocated buffer. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.lz4.decompress_block_into(compressed_bytes, output_buffer) -/// ``` -#[pyfunction] -pub fn decompress_block_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - let bytes = input.as_bytes(); - let out_bytes = output.as_bytes_mut()?; - py.allow_threads(|| libcramjam::lz4::block::decompress_into(bytes, out_bytes, Some(true))) - .map_err(DecompressionError::from_err) + /// LZ4 _block_ compression into pre-allocated buffer. + /// + /// The kwargs mostly follow the same definition found in [python-lz4 block.compress](https://python-lz4.readthedocs.io/en/stable/lz4.block.html#module-lz4.block) + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.lz4.compress_block_into( + /// ... b'some bytes here', + /// ... output=output_buffer, + /// ... mode=Option[str], + /// ... acceleration=Option[int], + /// ... compression=Option[int], + /// ... store_size=Option[bool] + /// ... ) + /// ``` + #[pyfunction] + #[allow(unused_variables)] + #[pyo3(signature = (data, output, mode=None, acceleration=None, compression=None, store_size=None))] + pub fn compress_block_into( + py: Python, + data: BytesType, + mut output: BytesType, + mode: Option<&str>, + acceleration: Option, + compression: Option, + store_size: Option, + ) -> PyResult { + let bytes = data.as_bytes(); + let out_bytes = output.as_bytes_mut()?; + py.allow_threads(|| { + libcramjam::lz4::block::compress_into( + bytes, + out_bytes, + compression.map(|v| v as _), + acceleration, + store_size, + ) + }) + .map_err(CompressionError::from_err) .map(|v| v as _) -} + } -/// LZ4 _block_ compression into pre-allocated buffer. -/// -/// The kwargs mostly follow the same definition found in [python-lz4 block.compress](https://python-lz4.readthedocs.io/en/stable/lz4.block.html#module-lz4.block) -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.lz4.compress_block_into( -/// ... b'some bytes here', -/// ... output=output_buffer, -/// ... mode=Option[str], -/// ... acceleration=Option[int], -/// ... compression=Option[int], -/// ... store_size=Option[bool] -/// ... ) -/// ``` -#[pyfunction] -#[allow(unused_variables)] -pub fn compress_block_into( - py: Python, - data: BytesType, - mut output: BytesType, - mode: Option<&str>, - acceleration: Option, - compression: Option, - store_size: Option, -) -> PyResult { - let bytes = data.as_bytes(); - let out_bytes = output.as_bytes_mut()?; - py.allow_threads(|| { - libcramjam::lz4::block::compress_into(bytes, out_bytes, compression.map(|v| v as _), acceleration, store_size) - }) - .map_err(CompressionError::from_err) - .map(|v| v as _) -} + /// Determine the size of a buffer which is guaranteed to hold the result of block compression, will error if + /// data is too long to be compressed by LZ4. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.lz4.compress_block_bound(b'some bytes here') + /// ``` + #[pyfunction] + pub fn compress_block_bound(src: BytesType) -> PyResult { + Ok(libcramjam::lz4::block::compress_bound(src.len(), Some(true))) + } -/// Determine the size of a buffer which is guaranteed to hold the result of block compression, will error if -/// data is too long to be compressed by LZ4. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.lz4.compress_block_bound(b'some bytes here') -/// ``` -#[pyfunction] -pub fn compress_block_bound(src: BytesType) -> PyResult { - Ok(libcramjam::lz4::block::compress_bound(src.len(), Some(true))) -} + /// lz4 Compressor object for streaming compression + #[pyclass] + pub struct Compressor { + inner: Option>>>, + } -/// lz4 Compressor object for streaming compression -#[pyclass] -pub struct Compressor { - inner: Option>>>, -} + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (level=None, content_checksum=None, block_linked=None))] + pub fn __init__( + level: Option, + content_checksum: Option, + block_linked: Option, + ) -> PyResult { + let inner = libcramjam::lz4::lz4::EncoderBuilder::new() + .auto_flush(true) + .level(level.unwrap_or(DEFAULT_COMPRESSION_LEVEL)) + .checksum(match content_checksum { + Some(false) => ContentChecksum::NoChecksum, + _ => ContentChecksum::ChecksumEnabled, + }) + .block_mode(match block_linked { + Some(false) => BlockMode::Independent, + _ => BlockMode::Linked, + }) + .build(Cursor::new(vec![]))?; + Ok(Self { inner: Some(inner) }) + } -#[pymethods] -impl Compressor { - /// Initialize a new `Compressor` instance. - #[new] - pub fn __init__(level: Option, content_checksum: Option, block_linked: Option) -> PyResult { - let inner = libcramjam::lz4::lz4::EncoderBuilder::new() - .auto_flush(true) - .level(level.unwrap_or(DEFAULT_COMPRESSION_LEVEL)) - .checksum(match content_checksum { - Some(false) => ContentChecksum::NoChecksum, - _ => ContentChecksum::ChecksumEnabled, - }) - .block_mode(match block_linked { - Some(false) => BlockMode::Independent, - _ => BlockMode::Linked, - }) - .build(Cursor::new(vec![]))?; - Ok(Self { inner: Some(inner) }) - } + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } - /// Compress input into the current compressor's stream. - pub fn compress(&mut self, input: &[u8]) -> PyResult { - crate::io::stream_compress(&mut self.inner, input) - } + /// Flush and return current compressed stream + #[allow(mutable_transmutes)] // TODO: feature req to lz4 to get mut ref to writer + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| { + let writer = e.writer(); + // no other mutations to buf b/c it'll be truncated and return immediately after this + unsafe { std::mem::transmute::<&Cursor>, &mut Cursor>>(writer) } + }) + } - /// Flush and return current compressed stream - #[allow(mutable_transmutes)] // TODO: feature req to lz4 to get mut ref to writer - pub fn flush(&mut self) -> PyResult { - crate::io::stream_flush(&mut self.inner, |e| { - let writer = e.writer(); - // no other mutations to buf b/c it'll be truncated and return immediately after this - unsafe { std::mem::transmute::<&Cursor>, &mut Cursor>>(writer) } - }) + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| { + let (cursor, result) = inner.finish(); + result.map(|_| cursor.into_inner()) + }) + } } - /// Consume the current compressor state and return the compressed stream - /// **NB** The compressor will not be usable after this method is called. - pub fn finish(&mut self) -> PyResult { - crate::io::stream_finish(&mut self.inner, |inner| { - let (cursor, result) = inner.finish(); - result.map(|_| cursor.into_inner()) - }) + mod _decompressor { + use super::*; + crate::make_decompressor!(lz4); } + #[pymodule_export] + use _decompressor::Decompressor; } - -crate::make_decompressor!(lz4); diff --git a/src/snappy.rs b/src/snappy.rs index 9133c7af..b5512ad2 100644 --- a/src/snappy.rs +++ b/src/snappy.rs @@ -1,163 +1,162 @@ //! snappy de/compression interface -use crate::exceptions::{CompressionError, DecompressionError}; -use crate::io::{AsBytes, RustyBuffer}; -use crate::BytesType; use pyo3::prelude::*; -use pyo3::wrap_pyfunction; -use pyo3::PyResult; -use std::io::Cursor; - -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(compress, m)?)?; - m.add_function(wrap_pyfunction!(decompress, m)?)?; - m.add_function(wrap_pyfunction!(compress_raw, m)?)?; - m.add_function(wrap_pyfunction!(decompress_raw, m)?)?; - m.add_function(wrap_pyfunction!(compress_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_into, m)?)?; - m.add_function(wrap_pyfunction!(compress_raw_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_raw_into, m)?)?; - m.add_function(wrap_pyfunction!(compress_raw_max_len, m)?)?; - m.add_function(wrap_pyfunction!(decompress_raw_len, m)?)?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} - -/// Snappy decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> # bytes or bytearray; bytearray is faster -/// >>> cramjam.snappy.decompress(compressed_bytes, output_len=Optional[None]) -/// ``` -#[pyfunction] -pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::snappy::decompress[data], output_len = output_len) - .map_err(DecompressionError::from_err) -} -/// Snappy compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> _ = cramjam.snappy.compress(b'some bytes here') -/// >>> _ = cramjam.snappy.compress(bytearray(b'this avoids double allocation in rust side, and thus faster!')) # <- use bytearray where possible -/// ``` -#[pyfunction] -pub fn compress(py: Python, data: BytesType, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::snappy::compress[data], output_len = output_len).map_err(CompressionError::from_err) -} - -/// Snappy decompression, raw -/// This does not use the snappy 'framed' encoding of compressed bytes. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.snappy.decompress_raw(compressed_raw_bytes) -/// ``` -#[pyfunction] -#[allow(unused_variables)] -pub fn decompress_raw(py: Python, data: BytesType, output_len: Option) -> PyResult { - let bytes = data.as_bytes(); - py.allow_threads(|| libcramjam::snappy::raw::decompress_vec(bytes)) - .map_err(DecompressionError::from_err) - .map(From::from) -} +/// snappy de/compression interface +#[pymodule] +pub mod snappy { + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::{AsBytes, RustyBuffer}; + use crate::BytesType; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::Cursor; + + /// Snappy decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> # bytes or bytearray; bytearray is faster + /// >>> cramjam.snappy.decompress(compressed_bytes, output_len=Optional[None]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::snappy::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } -/// Snappy compression raw. -/// This does not use the snappy 'framed' encoding of compressed bytes. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.snappy.compress_raw(b'some bytes here') -/// ``` -#[pyfunction] -#[allow(unused_variables)] -pub fn compress_raw(py: Python, data: BytesType, output_len: Option) -> PyResult { - let bytes = data.as_bytes(); - py.allow_threads(|| libcramjam::snappy::raw::compress_vec(bytes)) - .map_err(CompressionError::from_err) - .map(From::from) -} + /// Snappy compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> _ = cramjam.snappy.compress(b'some bytes here') + /// >>> _ = cramjam.snappy.compress(bytearray(b'this avoids double allocation in rust side, and thus faster!')) # <- use bytearray where possible + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn compress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::snappy::compress[data], output_len = output_len) + .map_err(CompressionError::from_err) + } -/// Compress directly into an output buffer -#[pyfunction] -pub fn compress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - crate::generic!(py, libcramjam::snappy::compress[input, output]).map_err(CompressionError::from_err) -} + /// Snappy decompression, raw + /// This does not use the snappy 'framed' encoding of compressed bytes. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.snappy.decompress_raw(compressed_raw_bytes) + /// ``` + #[pyfunction] + #[allow(unused_variables)] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress_raw(py: Python, data: BytesType, output_len: Option) -> PyResult { + let bytes = data.as_bytes(); + py.allow_threads(|| libcramjam::snappy::raw::decompress_vec(bytes)) + .map_err(DecompressionError::from_err) + .map(From::from) + } -/// Decompress directly into an output buffer -#[pyfunction] -pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - crate::generic!(py, libcramjam::snappy::decompress[input, output]).map_err(DecompressionError::from_err) -} + /// Snappy compression raw. + /// This does not use the snappy 'framed' encoding of compressed bytes. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.snappy.compress_raw(b'some bytes here') + /// ``` + #[pyfunction] + #[allow(unused_variables)] + #[pyo3(signature = (data, output_len=None))] + pub fn compress_raw(py: Python, data: BytesType, output_len: Option) -> PyResult { + let bytes = data.as_bytes(); + py.allow_threads(|| libcramjam::snappy::raw::compress_vec(bytes)) + .map_err(CompressionError::from_err) + .map(From::from) + } -/// Compress raw format directly into an output buffer -#[pyfunction] -pub fn compress_raw_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - let bytes_in = input.as_bytes(); - let bytes_out = output.as_bytes_mut()?; - py.allow_threads(|| libcramjam::snappy::raw::compress(bytes_in, bytes_out)) - .map_err(CompressionError::from_err) -} + /// Compress directly into an output buffer + #[pyfunction] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::snappy::compress[input, output]).map_err(CompressionError::from_err) + } -/// Decompress raw format directly into an output buffer -#[pyfunction] -pub fn decompress_raw_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - let bytes_in = input.as_bytes(); - let bytes_out = output.as_bytes_mut()?; - py.allow_threads(|| libcramjam::snappy::raw::decompress(bytes_in, bytes_out)) - .map_err(DecompressionError::from_err) -} + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::snappy::decompress[input, output]).map_err(DecompressionError::from_err) + } -/// Get the expected max compressed length for snappy raw compression; this is the size -/// of buffer that should be passed to `compress_raw_into` -#[pyfunction] -pub fn compress_raw_max_len(data: BytesType) -> usize { - libcramjam::snappy::snap::raw::max_compress_len(data.len()) -} + /// Compress raw format directly into an output buffer + #[pyfunction] + pub fn compress_raw_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + let bytes_in = input.as_bytes(); + let bytes_out = output.as_bytes_mut()?; + py.allow_threads(|| libcramjam::snappy::raw::compress(bytes_in, bytes_out)) + .map_err(CompressionError::from_err) + } -/// Get the decompressed length for the given data. This is the size of buffer -/// that should be passed to `decompress_raw_into` -#[pyfunction] -pub fn decompress_raw_len(data: BytesType) -> PyResult { - libcramjam::snappy::snap::raw::decompress_len(data.as_bytes()).map_err(DecompressionError::from_err) -} + /// Decompress raw format directly into an output buffer + #[pyfunction] + pub fn decompress_raw_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + let bytes_in = input.as_bytes(); + let bytes_out = output.as_bytes_mut()?; + py.allow_threads(|| libcramjam::snappy::raw::decompress(bytes_in, bytes_out)) + .map_err(DecompressionError::from_err) + } -/// Snappy Compressor object for streaming compression -#[pyclass] -pub struct Compressor { - inner: Option>>>, -} + /// Get the expected max compressed length for snappy raw compression; this is the size + /// of buffer that should be passed to `compress_raw_into` + #[pyfunction] + pub fn compress_raw_max_len(data: BytesType) -> usize { + libcramjam::snappy::snap::raw::max_compress_len(data.len()) + } -#[pymethods] -impl Compressor { - /// Initialize a new `Compressor` instance. - #[new] - pub fn __init__() -> PyResult { - let inner = libcramjam::snappy::snap::write::FrameEncoder::new(Cursor::new(vec![])); - Ok(Self { inner: Some(inner) }) + /// Get the decompressed length for the given data. This is the size of buffer + /// that should be passed to `decompress_raw_into` + #[pyfunction] + pub fn decompress_raw_len(data: BytesType) -> PyResult { + libcramjam::snappy::snap::raw::decompress_len(data.as_bytes()).map_err(DecompressionError::from_err) } - /// Compress input into the current compressor's stream. - pub fn compress(&mut self, input: &[u8]) -> PyResult { - crate::io::stream_compress(&mut self.inner, input) + /// Snappy Compressor object for streaming compression + #[pyclass] + pub struct Compressor { + inner: Option>>>, } - /// Flush and return current compressed stream - pub fn flush(&mut self) -> PyResult { - crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + pub fn __init__() -> PyResult { + let inner = libcramjam::snappy::snap::write::FrameEncoder::new(Cursor::new(vec![])); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.into_inner().map(|c| c.into_inner())) + } } - /// Consume the current compressor state and return the compressed stream - /// **NB** The compressor will not be usable after this method is called. - pub fn finish(&mut self) -> PyResult { - crate::io::stream_finish(&mut self.inner, |inner| inner.into_inner().map(|c| c.into_inner())) + mod _decompressor { + use super::*; + crate::make_decompressor!(snappy); } + #[pymodule_export] + use _decompressor::Decompressor; } - -crate::make_decompressor!(snappy); diff --git a/src/xz.rs b/src/xz.rs index 35475e82..6ac8a17f 100644 --- a/src/xz.rs +++ b/src/xz.rs @@ -1,385 +1,384 @@ //! xz and lzma de/compression interface use pyo3::prelude::*; -use pyo3::PyResult; -use crate::exceptions::{CompressionError, DecompressionError}; -use crate::io::{AsBytes, RustyBuffer}; -use crate::BytesType; -use pyo3::exceptions::PyNotImplementedError; -use pyo3::wrap_pyfunction; -use std::io::Cursor; +/// xz and lzma de/compression interface +#[pymodule] +pub mod xz { -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(compress, m)?)?; - m.add_function(wrap_pyfunction!(decompress, m)?)?; - m.add_function(wrap_pyfunction!(compress_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_into, m)?)?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} + use pyo3::prelude::*; + use pyo3::PyResult; -/// LZMA compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> _ = cramjam.xz.compress(b'some bytes here') -/// >>> # Defaults to XZ format, you can use the deprecated LZMA format like this: -/// >>> _ = cramjam.xz.compress(b'some bytes here', format=cramjam.xz.Format.ALONE) -/// ``` -#[pyfunction] -pub fn compress( - py: Python, - data: BytesType, - preset: Option, - format: Option, - check: Option, - filters: Option, - options: Option, - output_len: Option, -) -> PyResult { - crate::generic!( - py, - libcramjam::xz::compress[data], - output_len = output_len, - preset, - format, - check, - filters, - options - ) - .map_err(CompressionError::from_err) -} + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::{AsBytes, RustyBuffer}; + use crate::BytesType; + use pyo3::exceptions::PyNotImplementedError; + use std::io::Cursor; -/// Compress directly into an output buffer -#[pyfunction] -pub fn compress_into( - py: Python, - input: BytesType, - mut output: BytesType, - preset: Option, - format: Option, - check: Option, - filters: Option, - options: Option, -) -> PyResult { - crate::generic!(py, libcramjam::xz::compress[input, output], preset, format, check, filters, options) + /// LZMA compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> _ = cramjam.xz.compress(b'some bytes here') + /// >>> # Defaults to XZ format, you can use the deprecated LZMA format like this: + /// >>> _ = cramjam.xz.compress(b'some bytes here', format=cramjam.xz.Format.ALONE) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, preset=None, format=None, check=None, filters=None, options=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + preset: Option, + format: Option, + check: Option, + filters: Option, + options: Option, + output_len: Option, + ) -> PyResult { + crate::generic!( + py, + libcramjam::xz::compress[data], + output_len = output_len, + preset, + format, + check, + filters, + options + ) .map_err(CompressionError::from_err) -} - -/// LZMA decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> # bytes or bytearray; bytearray is faster -/// >>> cramjam.xz.decompress(compressed_bytes, output_len=Optional[None]) -/// ``` -#[pyfunction] -pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::xz::decompress[data], output_len = output_len).map_err(DecompressionError::from_err) -} - -/// Decompress directly into an output buffer -#[pyfunction] -pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { - crate::generic!(py, libcramjam::xz::decompress[input, output]).map_err(DecompressionError::from_err) -} -/// XZ Compressor object for streaming compression -#[pyclass] -pub struct Compressor { - inner: Option>>>, -} - -#[pymethods] -impl Compressor { - /// Initialize a new `Compressor` instance. - #[new] - pub fn __init__(preset: Option) -> PyResult { - let preset = preset.unwrap_or(5); - let inner = libcramjam::xz::xz2::write::XzEncoder::new(Cursor::new(vec![]), preset); - Ok(Self { inner: Some(inner) }) } - /// Compress input into the current compressor's stream. - pub fn compress(&mut self, input: &[u8]) -> PyResult { - crate::io::stream_compress(&mut self.inner, input) + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, preset=None, format=None, check=None, filters=None, options=None))] + pub fn compress_into( + py: Python, + input: BytesType, + mut output: BytesType, + preset: Option, + format: Option, + check: Option, + filters: Option, + options: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::xz::compress[input, output], preset, format, check, filters, options) + .map_err(CompressionError::from_err) } - /// Flush and return current compressed stream - pub fn flush(&mut self) -> PyResult { - Err(PyNotImplementedError::new_err( - "`.flush` for XZ/LZMA not implemented, just use `.finish()` instead when your done.", - )) + /// LZMA decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> # bytes or bytearray; bytearray is faster + /// >>> cramjam.xz.decompress(compressed_bytes, output_len=Optional[None]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::xz::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) } - /// Consume the current compressor state and return the compressed stream - /// **NB** The compressor will not be usable after this method is called. - pub fn finish(&mut self) -> PyResult { - crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::xz::decompress[input, output]).map_err(DecompressionError::from_err) } -} - -crate::make_decompressor!(xz); - -/// Available Filter IDs -#[derive(Clone, Debug)] -#[pyclass] -#[allow(missing_docs)] -pub enum Filter { - Arm, - ArmThumb, - Ia64, - Lzma1, - Lzma2, - PowerPC, - Sparc, - X86, -} -impl Default for Filter { - fn default() -> Self { - Self::Lzma2 + /// XZ Compressor object for streaming compression + #[pyclass] + pub struct Compressor { + inner: Option>>>, } -} -/// MatchFinder, used with Options.mf attribute -#[derive(Clone, Debug)] -#[pyclass] -#[allow(missing_docs)] -pub enum MatchFinder { - HashChain3, - HashChain4, - BinaryTree2, - BinaryTree3, - BinaryTree4, -} + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (preset=None))] + pub fn __init__(preset: Option) -> PyResult { + let preset = preset.unwrap_or(5); + let inner = libcramjam::xz::xz2::write::XzEncoder::new(Cursor::new(vec![]), preset); + Ok(Self { inner: Some(inner) }) + } -impl Into for MatchFinder { - fn into(self) -> libcramjam::xz::MatchFinder { - match self { - Self::HashChain3 => libcramjam::xz::MatchFinder::HashChain3, - Self::HashChain4 => libcramjam::xz::MatchFinder::HashChain4, - Self::BinaryTree2 => libcramjam::xz::MatchFinder::BinaryTree2, - Self::BinaryTree3 => libcramjam::xz::MatchFinder::BinaryTree3, - Self::BinaryTree4 => libcramjam::xz::MatchFinder::BinaryTree4, + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) } - } -} -/// MatchFinder, used with Options.mode attribute -#[derive(Clone, Debug)] -#[pyclass] -#[allow(missing_docs)] -pub enum Mode { - Fast, - Normal, -} -impl Into for Mode { - fn into(self) -> libcramjam::xz::Mode { - match self { - Self::Fast => libcramjam::xz::Mode::Fast, - Self::Normal => libcramjam::xz::Mode::Normal, + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + Err(PyNotImplementedError::new_err( + "`.flush` for XZ/LZMA not implemented, just use `.finish()` instead when your done.", + )) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) } } -} -/// FilterChain, similar to the default Python XZ filter chain which is a list of -/// dicts. -#[derive(Debug, Clone)] -#[pyclass] -pub struct FilterChain(Vec); + mod _decompressor { + use super::*; + crate::make_decompressor!(xz); + } + #[pymodule_export] + use _decompressor::Decompressor; -#[pymethods] -#[allow(missing_docs)] -impl FilterChain { - #[new] - pub fn __init__() -> Self { - Self(vec![]) + /// Available Filter IDs + #[derive(Clone, Debug, PartialEq)] + #[pyclass(eq, eq_int)] + #[allow(missing_docs)] + pub enum Filter { + Arm, + ArmThumb, + Ia64, + Lzma1, + Lzma2, + PowerPC, + Sparc, + X86, } - pub fn append_filter(&mut self, filter_chain_item: FilterChainItem) { - self.0.push(filter_chain_item); + impl Default for Filter { + fn default() -> Self { + Self::Lzma2 + } } -} -impl Into for FilterChain { - fn into(self) -> libcramjam::xz::Filters { - let mut filters = libcramjam::xz::Filters::new(); - for filter in self.0 { - match filter.filter { - Filter::Lzma1 => filters.lzma1(&filter.try_into().unwrap()), - Filter::Lzma2 => filters.lzma2(&filter.try_into().unwrap()), - Filter::Arm => filters.arm(), - Filter::ArmThumb => filters.arm_thumb(), - Filter::Ia64 => filters.ia64(), - Filter::PowerPC => filters.powerpc(), - Filter::Sparc => filters.sparc(), - Filter::X86 => filters.x86(), - }; - } - filters + /// MatchFinder, used with Options.mf attribute + #[derive(Clone, Debug, PartialEq)] + #[pyclass(eq, eq_int)] + #[allow(missing_docs)] + pub enum MatchFinder { + HashChain3, + HashChain4, + BinaryTree2, + BinaryTree3, + BinaryTree4, } -} -/// FilterChainItem. In Python's lzma module, this represents a single dict in the -/// filter chain list. To be added to the `FilterChain` -#[derive(Clone, Debug, Default)] -#[pyclass] -pub struct FilterChainItem { - filter: Filter, - options: Options, -} + impl Into for MatchFinder { + fn into(self) -> libcramjam::xz::MatchFinder { + match self { + Self::HashChain3 => libcramjam::xz::MatchFinder::HashChain3, + Self::HashChain4 => libcramjam::xz::MatchFinder::HashChain4, + Self::BinaryTree2 => libcramjam::xz::MatchFinder::BinaryTree2, + Self::BinaryTree3 => libcramjam::xz::MatchFinder::BinaryTree3, + Self::BinaryTree4 => libcramjam::xz::MatchFinder::BinaryTree4, + } + } + } -#[pymethods] -impl FilterChainItem { - #[new] + /// MatchFinder, used with Options.mode attribute + #[derive(Clone, Debug, PartialEq)] + #[pyclass(eq, eq_int)] #[allow(missing_docs)] - pub fn __init__(filter: Filter, options: Option) -> Self { - Self { - filter, - options: options.unwrap_or_default(), + pub enum Mode { + Fast, + Normal, + } + impl Into for Mode { + fn into(self) -> libcramjam::xz::Mode { + match self { + Self::Fast => libcramjam::xz::Mode::Fast, + Self::Normal => libcramjam::xz::Mode::Normal, + } } } -} -/// -#[derive(Clone, Debug, Default)] -#[pyclass] -pub struct Options { - preset: Option, - dict_size: Option, - lc: Option, - lp: Option, - pb: Option, - mode: Option, - nice_len: Option, - mf: Option, - depth: Option, -} + /// FilterChain, similar to the default Python XZ filter chain which is a list of + /// dicts. + #[derive(Debug, Clone)] + #[pyclass] + pub struct FilterChain(Vec); -impl Into for FilterChainItem { - fn into(self) -> libcramjam::xz::LzmaOptions { - self.options.into() + #[pymethods] + #[allow(missing_docs)] + impl FilterChain { + #[new] + pub fn __init__() -> Self { + Self(vec![]) + } + pub fn append_filter(&mut self, filter_chain_item: FilterChainItem) { + self.0.push(filter_chain_item); + } } -} -impl Into for Options { - fn into(self) -> libcramjam::xz::LzmaOptions { - let mut opts = libcramjam::xz::LzmaOptions::new_preset(self.preset.unwrap_or(6)).unwrap(); - self.dict_size.map(|dict_size| opts.dict_size(dict_size)); - self.lc.map(|lc| opts.literal_context_bits(lc)); - self.lp.map(|lp| opts.literal_position_bits(lp)); - self.pb.map(|pb| opts.position_bits(pb)); - self.mode.map(|mode| opts.mode(mode.into())); - self.nice_len.map(|nice_len| opts.nice_len(nice_len as _)); - self.mf.map(|mf| opts.match_finder(mf.into())); - self.depth.map(|depth| opts.depth(depth as _)); - opts + impl Into for FilterChain { + fn into(self) -> libcramjam::xz::Filters { + let mut filters = libcramjam::xz::Filters::new(); + for filter in self.0 { + match filter.filter { + Filter::Lzma1 => filters.lzma1(&filter.try_into().unwrap()), + Filter::Lzma2 => filters.lzma2(&filter.try_into().unwrap()), + Filter::Arm => filters.arm(), + Filter::ArmThumb => filters.arm_thumb(), + Filter::Ia64 => filters.ia64(), + Filter::PowerPC => filters.powerpc(), + Filter::Sparc => filters.sparc(), + Filter::X86 => filters.x86(), + }; + } + filters + } } -} -#[pymethods] -#[allow(missing_docs)] -impl Options { - #[new] - pub fn __init__() -> Self { - Self::default() - } - pub fn set_preset(&mut self, preset: u32) -> Self { - self.preset = Some(preset); - self.clone() + /// FilterChainItem. In Python's lzma module, this represents a single dict in the + /// filter chain list. To be added to the `FilterChain` + #[derive(Clone, Debug, Default)] + #[pyclass] + pub struct FilterChainItem { + filter: Filter, + options: Options, } - pub fn set_dict_size(&mut self, dict_size: u32) -> Self { - self.dict_size = Some(dict_size); - self.clone() - } - pub fn set_lc(&mut self, lc: u32) -> Self { - self.lc = Some(lc); - self.clone() - } - pub fn set_lp(&mut self, lp: u32) -> Self { - self.lp = Some(lp); - self.clone() - } - pub fn set_pb(&mut self, pb: u32) -> Self { - self.pb = Some(pb); - self.clone() + + #[pymethods] + impl FilterChainItem { + #[new] + #[allow(missing_docs)] + #[pyo3(signature = (filter, options=None))] + pub fn __init__(filter: Filter, options: Option) -> Self { + Self { + filter, + options: options.unwrap_or_default(), + } + } } - pub fn set_mode(&mut self, mode: Mode) -> Self { - self.mode = Some(mode); - self.clone() + + /// + #[derive(Clone, Debug, Default)] + #[pyclass] + pub struct Options { + preset: Option, + dict_size: Option, + lc: Option, + lp: Option, + pb: Option, + mode: Option, + nice_len: Option, + mf: Option, + depth: Option, } - pub fn set_nice_len(&mut self, nice_len: usize) -> Self { - self.nice_len = Some(nice_len); - self.clone() + + impl Into for FilterChainItem { + fn into(self) -> libcramjam::xz::LzmaOptions { + self.options.into() + } } - pub fn set_mf(&mut self, mf: MatchFinder) -> Self { - self.mf = Some(mf); - self.clone() + + impl Into for Options { + fn into(self) -> libcramjam::xz::LzmaOptions { + let mut opts = libcramjam::xz::LzmaOptions::new_preset(self.preset.unwrap_or(6)).unwrap(); + self.dict_size.map(|dict_size| opts.dict_size(dict_size)); + self.lc.map(|lc| opts.literal_context_bits(lc)); + self.lp.map(|lp| opts.literal_position_bits(lp)); + self.pb.map(|pb| opts.position_bits(pb)); + self.mode.map(|mode| opts.mode(mode.into())); + self.nice_len.map(|nice_len| opts.nice_len(nice_len as _)); + self.mf.map(|mf| opts.match_finder(mf.into())); + self.depth.map(|depth| opts.depth(depth as _)); + opts + } } - pub fn set_depth(&mut self, depth: usize) -> Self { - self.depth = Some(depth); - self.clone() + + #[pymethods] + #[allow(missing_docs)] + impl Options { + #[new] + pub fn __init__() -> Self { + Self::default() + } + pub fn set_preset(&mut self, preset: u32) -> Self { + self.preset = Some(preset); + self.clone() + } + pub fn set_dict_size(&mut self, dict_size: u32) -> Self { + self.dict_size = Some(dict_size); + self.clone() + } + pub fn set_lc(&mut self, lc: u32) -> Self { + self.lc = Some(lc); + self.clone() + } + pub fn set_lp(&mut self, lp: u32) -> Self { + self.lp = Some(lp); + self.clone() + } + pub fn set_pb(&mut self, pb: u32) -> Self { + self.pb = Some(pb); + self.clone() + } + pub fn set_mode(&mut self, mode: Mode) -> Self { + self.mode = Some(mode); + self.clone() + } + pub fn set_nice_len(&mut self, nice_len: usize) -> Self { + self.nice_len = Some(nice_len); + self.clone() + } + pub fn set_mf(&mut self, mf: MatchFinder) -> Self { + self.mf = Some(mf); + self.clone() + } + pub fn set_depth(&mut self, depth: usize) -> Self { + self.depth = Some(depth); + self.clone() + } } -} -/// Possible formats -#[derive(Clone, Debug)] -#[pyclass] -pub enum Format { - /// Auto select the format, for compression this is XZ, - /// for decompression it will be determined by the compressed input. - AUTO, - /// The `.xz` format (default) - XZ, - /// Legacy `.lzma` format. - ALONE, - /// Raw data stream - RAW, -} + /// Possible formats + #[derive(Clone, Debug, PartialEq)] + #[pyclass(eq, eq_int)] + pub enum Format { + /// Auto select the format, for compression this is XZ, + /// for decompression it will be determined by the compressed input. + AUTO, + /// The `.xz` format (default) + XZ, + /// Legacy `.lzma` format. + ALONE, + /// Raw data stream + RAW, + } -impl Default for Format { - fn default() -> Self { - Format::XZ + impl Default for Format { + fn default() -> Self { + Format::XZ + } } -} -impl Into for Format { - fn into(self) -> libcramjam::xz::Format { - match self { - Self::AUTO => libcramjam::xz::Format::AUTO, - Self::XZ => libcramjam::xz::Format::XZ, - Self::ALONE => libcramjam::xz::Format::ALONE, - Self::RAW => libcramjam::xz::Format::RAW, + impl Into for Format { + fn into(self) -> libcramjam::xz::Format { + match self { + Self::AUTO => libcramjam::xz::Format::AUTO, + Self::XZ => libcramjam::xz::Format::XZ, + Self::ALONE => libcramjam::xz::Format::ALONE, + Self::RAW => libcramjam::xz::Format::RAW, + } } } -} -/// Possible Check configurations -#[derive(Debug, Clone)] -#[pyclass] -#[allow(missing_docs)] -pub enum Check { - Crc64, - Crc32, - Sha256, - None, -} + /// Possible Check configurations + #[derive(Debug, Clone, PartialEq)] + #[pyclass(eq, eq_int)] + #[allow(missing_docs)] + pub enum Check { + Crc64, + Crc32, + Sha256, + None, + } -impl Into for Check { - fn into(self) -> libcramjam::xz::Check { - match self { - Self::Crc64 => libcramjam::xz::Check::Crc64, - Self::Crc32 => libcramjam::xz::Check::Crc32, - Self::Sha256 => libcramjam::xz::Check::Sha256, - Self::None => libcramjam::xz::Check::None, + impl Into for Check { + fn into(self) -> libcramjam::xz::Check { + match self { + Self::Crc64 => libcramjam::xz::Check::Crc64, + Self::Crc32 => libcramjam::xz::Check::Crc32, + Self::Sha256 => libcramjam::xz::Check::Sha256, + Self::None => libcramjam::xz::Check::None, + } } } } diff --git a/src/zstd.rs b/src/zstd.rs index 31bec17e..763382e5 100644 --- a/src/zstd.rs +++ b/src/zstd.rs @@ -1,95 +1,104 @@ //! zstd de/compression interface -use crate::exceptions::{CompressionError, DecompressionError}; -use crate::io::RustyBuffer; -use crate::{AsBytes, BytesType}; use pyo3::prelude::*; -use pyo3::wrap_pyfunction; -use pyo3::PyResult; -use std::io::Cursor; -const DEFAULT_COMPRESSION_LEVEL: i32 = 0; +/// zstd de/compression interface +#[pymodule] +pub mod zstd { + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::RustyBuffer; + use crate::{AsBytes, BytesType}; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::Cursor; -pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(compress, m)?)?; - m.add_function(wrap_pyfunction!(decompress, m)?)?; - m.add_function(wrap_pyfunction!(compress_into, m)?)?; - m.add_function(wrap_pyfunction!(decompress_into, m)?)?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} - -/// ZSTD decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.zstd.decompress(compressed_bytes, output_len=Optional[int]) -/// ``` -#[pyfunction] -pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::zstd::decompress[data], output_len = output_len) - .map_err(DecompressionError::from_err) -} - -/// ZSTD compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> cramjam.zstd.compress(b'some bytes here', level=0, output_len=Optional[int]) # level defaults to 11 -/// ``` -#[pyfunction] -pub fn compress(py: Python, data: BytesType, level: Option, output_len: Option) -> PyResult { - crate::generic!(py, libcramjam::zstd::compress[data], output_len = output_len, level) - .map_err(CompressionError::from_err) -} + const DEFAULT_COMPRESSION_LEVEL: i32 = 0; -/// Compress directly into an output buffer -#[pyfunction] -pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { - crate::generic!(py, libcramjam::zstd::compress[input, output], level).map_err(CompressionError::from_err) -} + /// ZSTD decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.zstd.decompress(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::zstd::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } -/// Decompress directly into an output buffer -#[pyfunction] -pub fn decompress_into<'a>(py: Python<'a>, input: BytesType<'a>, mut output: BytesType<'a>) -> PyResult { - crate::generic!(py, libcramjam::zstd::decompress[input, output]).map_err(DecompressionError::from_err) -} + /// ZSTD compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.zstd.compress(b'some bytes here', level=0, output_len=Optional[int]) # level defaults to 11 + /// ``` + #[pyfunction] + #[pyo3(signature = (data, level=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + level: Option, + output_len: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::zstd::compress[data], output_len = output_len, level) + .map_err(CompressionError::from_err) + } -/// ZSTD Compressor object for streaming compression -#[pyclass] -pub struct Compressor { - inner: Option>>>, -} + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, level=None))] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { + crate::generic!(py, libcramjam::zstd::compress[input, output], level).map_err(CompressionError::from_err) + } -#[pymethods] -impl Compressor { - /// Initialize a new `Compressor` instance. - #[new] - pub fn __init__(level: Option) -> PyResult { - let inner = libcramjam::zstd::zstd::stream::write::Encoder::new( - Cursor::new(vec![]), - level.unwrap_or(DEFAULT_COMPRESSION_LEVEL), - )?; - Ok(Self { inner: Some(inner) }) + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into<'a>(py: Python<'a>, input: BytesType<'a>, mut output: BytesType<'a>) -> PyResult { + crate::generic!(py, libcramjam::zstd::decompress[input, output]).map_err(DecompressionError::from_err) } - /// Compress input into the current compressor's stream. - pub fn compress(&mut self, input: &[u8]) -> PyResult { - crate::io::stream_compress(&mut self.inner, input) + /// ZSTD Compressor object for streaming compression + #[pyclass] + pub struct Compressor { + inner: Option>>>, } - /// Flush and return current compressed stream - pub fn flush(&mut self) -> PyResult { - crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (level=None))] + pub fn __init__(level: Option) -> PyResult { + let inner = libcramjam::zstd::zstd::stream::write::Encoder::new( + Cursor::new(vec![]), + level.unwrap_or(DEFAULT_COMPRESSION_LEVEL), + )?; + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|v| v.into_inner())) + } } - /// Consume the current compressor state and return the compressed stream - /// **NB** The compressor will not be usable after this method is called. - pub fn finish(&mut self) -> PyResult { - crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|v| v.into_inner())) + mod _decompressor { + use super::*; + crate::make_decompressor!(zstd); } + #[pymodule_export] + use _decompressor::Decompressor; } - -crate::make_decompressor!(zstd);