Skip to content

Commit

Permalink
Switch brotli2 -> brotli crate (#87)
Browse files Browse the repository at this point in the history
Fixes #86
  • Loading branch information
milesgranger authored Oct 29, 2022
1 parent 29d9e3b commit f6570b9
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 53 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ jobs:
- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Audit
run: cargo install cargo-audit && cargo audit
- name: Build Wheels
uses: messense/maturin-action@v1
with:
Expand Down
40 changes: 28 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
[package]
name = "cramjam"
version = "2.6.0"
version = "2.6.1"
authors = ["Miles Granger <[email protected]>"]
edition = "2018"
license = "MIT"
license-file = "LICENSE"
description = "Thin Python bindings to de/compression algorithms in Rust"
readme = "README.md"
Expand All @@ -25,7 +24,7 @@ opt-level = 3
[dependencies]
pyo3 = { version = "0.16", default-features = false, features = ["macros"] }
snap = "^1"
brotli2 = "^0.3"
brotli = { version = "^3", default-features = false, features = ["std"] }
bzip2 = "^0.4"
lz4 = "^1"
flate2 = "^1"
Expand Down
2 changes: 1 addition & 1 deletion benchmark-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pytest-benchmark==3.2.3
pytest-benchmark==4.0.0
python-snappy==0.5.4
lz4==3.1.0
brotlipy==0.7.0
Expand Down
56 changes: 28 additions & 28 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -293,34 +293,34 @@ test_lz4_block[urls.10K-python-lz4] 2,001.9611 (66.88)
`make bench-brotli`

```bash
----------------------------------------------------------------------------------------------------- benchmark: 24 tests ------------------------------------------------------------------------------------------------------
Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_brotli[Mark.Twain-Tom.Sawyer.txt-brotli] 17.8897 (1.0) 21.5322 (1.05) 18.4333 (1.0) 0.6208 (1.44) 18.2443 (1.0) 0.4319 (1.0) 5;5 54.2498 (1.0) 50 1
test_brotli[Mark.Twain-Tom.Sawyer.txt-cramjam] 18.1533 (1.01) 20.4369 (1.0) 18.7759 (1.02) 0.5898 (1.37) 18.6078 (1.02) 0.8347 (1.93) 10;1 53.2598 (0.98) 50 1
test_brotli[alice29.txt-brotli] 220.3170 (12.32) 230.1574 (11.26) 224.9658 (12.20) 3.8282 (8.88) 223.9629 (12.28) 5.6323 (13.04) 2;0 4.4451 (0.08) 5 1
test_brotli[alice29.txt-cramjam] 226.4241 (12.66) 228.6188 (11.19) 227.6423 (12.35) 0.9194 (2.13) 227.9714 (12.50) 1.5160 (3.51) 2;0 4.3929 (0.08) 5 1
test_brotli[asyoulik.txt-brotli] 176.8532 (9.89) 178.5816 (8.74) 177.9033 (9.65) 0.6775 (1.57) 177.9574 (9.75) 0.9254 (2.14) 2;0 5.6210 (0.10) 6 1
test_brotli[asyoulik.txt-cramjam] 181.3138 (10.14) 185.9610 (9.10) 183.0713 (9.93) 1.8366 (4.26) 182.5439 (10.01) 2.9700 (6.88) 1;0 5.4624 (0.10) 6 1
test_brotli[fireworks.jpeg-brotli] 70.7069 (3.95) 75.6588 (3.70) 71.9962 (3.91) 1.1901 (2.76) 71.7863 (3.93) 0.6675 (1.55) 2;1 13.8896 (0.26) 14 1
test_brotli[fireworks.jpeg-cramjam] 71.3536 (3.99) 73.7503 (3.61) 71.8630 (3.90) 0.6269 (1.45) 71.7001 (3.93) 0.5713 (1.32) 1;1 13.9154 (0.26) 14 1
test_brotli[geo.protodata-brotli] 124.7704 (6.97) 126.1844 (6.17) 125.5772 (6.81) 0.4313 (1.0) 125.5776 (6.88) 0.4719 (1.09) 2;0 7.9632 (0.15) 8 1
test_brotli[geo.protodata-cramjam] 128.6971 (7.19) 130.4920 (6.39) 129.6153 (7.03) 0.6275 (1.45) 129.5793 (7.10) 1.0149 (2.35) 4;0 7.7151 (0.14) 8 1
test_brotli[html-brotli] 133.0865 (7.44) 137.1792 (6.71) 134.8985 (7.32) 1.1569 (2.68) 134.9101 (7.39) 0.7996 (1.85) 2;2 7.4130 (0.14) 8 1
test_brotli[html-cramjam] 136.2971 (7.62) 141.6241 (6.93) 138.3340 (7.50) 2.2464 (5.21) 137.6318 (7.54) 3.8429 (8.90) 2;0 7.2289 (0.13) 8 1
test_brotli[html_x_4-brotli] 162.2292 (9.07) 164.1336 (8.03) 163.3756 (8.86) 0.7556 (1.75) 163.5865 (8.97) 1.3218 (3.06) 2;0 6.1209 (0.11) 6 1
test_brotli[html_x_4-cramjam] 166.7431 (9.32) 168.3913 (8.24) 167.3817 (9.08) 0.6918 (1.60) 167.2347 (9.17) 1.1951 (2.77) 1;0 5.9744 (0.11) 6 1
test_brotli[kppkn.gtb-brotli] 416.0493 (23.26) 420.2574 (20.56) 417.5143 (22.65) 1.6582 (3.84) 417.0724 (22.86) 1.9902 (4.61) 1;0 2.3951 (0.04) 5 1
test_brotli[kppkn.gtb-cramjam] 432.7377 (24.19) 438.8289 (21.47) 434.3502 (23.56) 2.5329 (5.87) 433.5837 (23.77) 2.0103 (4.65) 1;1 2.3023 (0.04) 5 1
test_brotli[lcet10.txt-brotli] 689.3127 (38.53) 695.1439 (34.01) 691.5692 (37.52) 2.2512 (5.22) 691.0756 (37.88) 2.8880 (6.69) 2;0 1.4460 (0.03) 5 1
test_brotli[lcet10.txt-cramjam] 706.3514 (39.48) 730.3781 (35.74) 713.2318 (38.69) 9.7223 (22.54) 709.3515 (38.88) 7.2208 (16.72) 1;1 1.4021 (0.03) 5 1
test_brotli[paper-100k.pdf-brotli] 397.2979 (22.21) 399.6303 (19.55) 398.6496 (21.63) 0.9994 (2.32) 398.5795 (21.85) 1.6975 (3.93) 1;0 2.5085 (0.05) 5 1
test_brotli[paper-100k.pdf-cramjam] 397.1881 (22.20) 487.4731 (23.85) 418.1530 (22.68) 38.8914 (90.17) 401.4564 (22.00) 27.9225 (64.65) 1;1 2.3915 (0.04) 5 1
test_brotli[plrabn12.txt-brotli] 766.4343 (42.84) 965.1578 (47.23) 822.5129 (44.62) 81.3593 (188.62) 785.6867 (43.06) 70.6752 (163.63) 1;1 1.2158 (0.02) 5 1
test_brotli[plrabn12.txt-cramjam] 776.7893 (43.42) 791.7569 (38.74) 780.8096 (42.36) 6.4344 (14.92) 777.1387 (42.60) 7.2310 (16.74) 1;0 1.2807 (0.02) 5 1
test_brotli[urls.10K-brotli] 1,232.4364 (68.89) 1,281.6436 (62.71) 1,251.4702 (67.89) 21.9635 (50.92) 1,239.3781 (67.93) 36.2594 (83.95) 1;0 0.7991 (0.01) 5 1
test_brotli[urls.10K-cramjam] 1,256.7332 (70.25) 1,348.1630 (65.97) 1,284.4820 (69.68) 36.9337 (85.63) 1,274.7599 (69.87) 37.5966 (87.05) 1;0 0.7785 (0.01) 5 1
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------- benchmark: 24 tests --------------------------------------------------------------------------------------------------
Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_brotli[Mark.Twain-Tom.Sawyer.txt-brotli] 10.9184 (1.0) 14.6035 (1.0) 12.3503 (1.0) 1.1312 (1.74) 11.8966 (1.0) 2.1789 (5.62) 30;0 80.9697 (1.0) 82 1
test_brotli[Mark.Twain-Tom.Sawyer.txt-cramjam] 14.0977 (1.29) 17.0087 (1.16) 14.5718 (1.18) 0.6504 (1.0) 14.3074 (1.20) 0.3876 (1.0) 5;5 68.6259 (0.85) 47 1
test_brotli[alice29.txt-brotli] 141.2160 (12.93) 145.2801 (9.95) 142.9074 (11.57) 1.4637 (2.25) 142.5277 (11.98) 2.1899 (5.65) 2;0 6.9975 (0.09) 7 1
test_brotli[alice29.txt-cramjam] 152.6566 (13.98) 158.3756 (10.85) 154.4223 (12.50) 1.9262 (2.96) 154.1212 (12.96) 1.7533 (4.52) 1;1 6.4757 (0.08) 7 1
test_brotli[asyoulik.txt-brotli] 110.8312 (10.15) 116.3864 (7.97) 113.1018 (9.16) 1.9668 (3.02) 112.1098 (9.42) 3.2754 (8.45) 2;0 8.8416 (0.11) 9 1
test_brotli[asyoulik.txt-cramjam] 120.2650 (11.01) 142.7060 (9.77) 130.4795 (10.56) 9.0843 (13.97) 129.5092 (10.89) 16.0443 (41.40) 3;0 7.6640 (0.09) 9 1
test_brotli[fireworks.jpeg-brotli] 35.0424 (3.21) 52.9552 (3.63) 42.1037 (3.41) 6.8887 (10.59) 37.3856 (3.14) 12.9743 (33.48) 8;0 23.7509 (0.29) 23 1
test_brotli[fireworks.jpeg-cramjam] 45.6972 (4.19) 51.4216 (3.52) 47.9267 (3.88) 1.4601 (2.25) 48.0562 (4.04) 2.0533 (5.30) 7;0 20.8652 (0.26) 22 1
test_brotli[geo.protodata-brotli] 70.8891 (6.49) 73.5486 (5.04) 72.2200 (5.85) 0.8960 (1.38) 72.7037 (6.11) 1.5739 (4.06) 5;0 13.8466 (0.17) 14 1
test_brotli[geo.protodata-cramjam] 91.4206 (8.37) 94.7283 (6.49) 93.2170 (7.55) 0.9418 (1.45) 93.4534 (7.86) 1.1083 (2.86) 4;0 10.7277 (0.13) 11 1
test_brotli[html-brotli] 74.1237 (6.79) 76.8266 (5.26) 75.6515 (6.13) 0.9074 (1.40) 75.8785 (6.38) 1.3270 (3.42) 5;0 13.2185 (0.16) 13 1
test_brotli[html-cramjam] 102.4915 (9.39) 125.3486 (8.58) 112.2085 (9.09) 9.9984 (15.37) 106.6630 (8.97) 19.7193 (50.88) 3;0 8.9120 (0.11) 10 1
test_brotli[html_x_4-brotli] 87.5360 (8.02) 114.3479 (7.83) 97.7994 (7.92) 11.4060 (17.54) 90.7073 (7.62) 21.9963 (56.75) 3;0 10.2250 (0.13) 10 1
test_brotli[html_x_4-cramjam] 105.3706 (9.65) 111.3189 (7.62) 107.3742 (8.69) 1.9368 (2.98) 107.6139 (9.05) 2.9738 (7.67) 3;0 9.3132 (0.12) 10 1
test_brotli[kppkn.gtb-brotli] 271.9317 (24.91) 322.6200 (22.09) 284.3380 (23.02) 21.5044 (33.07) 276.2475 (23.22) 15.4194 (39.78) 1;1 3.5169 (0.04) 5 1
test_brotli[kppkn.gtb-cramjam] 323.5000 (29.63) 328.4267 (22.49) 324.7345 (26.29) 2.0891 (3.21) 324.0014 (27.23) 1.7883 (4.61) 1;1 3.0794 (0.04) 5 1
test_brotli[lcet10.txt-brotli] 442.0427 (40.49) 504.8650 (34.57) 462.6317 (37.46) 27.2407 (41.89) 446.6555 (37.54) 39.1690 (101.06) 1;0 2.1615 (0.03) 5 1
test_brotli[lcet10.txt-cramjam] 444.3975 (40.70) 511.8355 (35.05) 463.9231 (37.56) 27.3992 (42.13) 453.0632 (38.08) 24.6888 (63.70) 1;1 2.1555 (0.03) 5 1
test_brotli[paper-100k.pdf-brotli] 262.9858 (24.09) 307.7572 (21.07) 276.0070 (22.35) 18.9312 (29.11) 265.8558 (22.35) 22.6216 (58.37) 1;0 3.6231 (0.04) 5 1
test_brotli[paper-100k.pdf-cramjam] 91.9629 (8.42) 95.6179 (6.55) 93.8155 (7.60) 1.3233 (2.03) 93.5711 (7.87) 2.4300 (6.27) 5;0 10.6592 (0.13) 11 1
test_brotli[plrabn12.txt-brotli] 483.2054 (44.26) 497.8085 (34.09) 487.2513 (39.45) 6.0998 (9.38) 484.6980 (40.74) 6.3864 (16.48) 1;0 2.0523 (0.03) 5 1
test_brotli[plrabn12.txt-cramjam] 502.6484 (46.04) 530.4874 (36.33) 511.1903 (41.39) 11.2501 (17.30) 506.6601 (42.59) 11.8233 (30.51) 1;0 1.9562 (0.02) 5 1
test_brotli[urls.10K-brotli] 731.0115 (66.95) 808.1545 (55.34) 753.5258 (61.01) 32.1086 (49.37) 736.5432 (61.91) 35.3638 (91.24) 1;0 1.3271 (0.02) 5 1
test_brotli[urls.10K-cramjam] 722.6436 (66.19) 823.0498 (56.36) 743.6850 (60.22) 44.3756 (68.23) 724.4152 (60.89) 26.3356 (67.95) 1;1 1.3447 (0.02) 5 1
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
```

#### Bzip2
Expand Down
19 changes: 11 additions & 8 deletions src/brotli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ use crate::{to_py_err, BytesType};
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use pyo3::PyResult;
use std::io::Cursor;
use std::io::{Cursor, Write};

const DEFAULT_COMPRESSION_LEVEL: u32 = 11;
const BUF_SIZE: usize = 1 << 17; // Taken from brotli kCompressFragementTwoPassBlockSize
const LGWIN: u32 = 22;

pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
Expand Down Expand Up @@ -59,7 +61,7 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult<usiz
/// Brotli Compressor object for streaming compression
#[pyclass]
pub struct Compressor {
inner: Option<brotli2::write::BrotliEncoder<Cursor<Vec<u8>>>>,
inner: Option<brotli::CompressorWriter<Cursor<Vec<u8>>>>,
}

#[pymethods]
Expand All @@ -68,7 +70,7 @@ impl Compressor {
#[new]
pub fn __init__(level: Option<u32>) -> PyResult<Self> {
let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL);
let inner = brotli2::write::BrotliEncoder::new(Cursor::new(vec![]), level);
let inner = brotli::CompressorWriter::new(Cursor::new(vec![]), BUF_SIZE, level, LGWIN);
Ok(Self { inner: Some(inner) })
}

Expand All @@ -85,28 +87,29 @@ impl Compressor {
/// Consume the current compressor state and return the compressed stream
/// **NB** The compressor will not be usable after this method is called.
pub fn finish(&mut self) -> PyResult<RustyBuffer> {
crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner()))
crate::io::stream_finish(&mut self.inner, |mut inner| {
inner.flush().map(|_| inner.into_inner().into_inner())
})
}
}

pub(crate) mod internal {

use crate::brotli::DEFAULT_COMPRESSION_LEVEL;
use brotli2::read::{BrotliDecoder, BrotliEncoder};
use crate::brotli::{BUF_SIZE, DEFAULT_COMPRESSION_LEVEL, LGWIN};
use std::io::prelude::*;
use std::io::Error;

/// Decompress via Brotli
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = BrotliDecoder::new(input);
let mut decoder = brotli::Decompressor::new(input, BUF_SIZE);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress via Brotli
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL);
let mut encoder = BrotliEncoder::new(input, level);
let mut encoder = brotli::CompressorReader::new(input, BUF_SIZE, level, LGWIN);
let n_bytes = std::io::copy(&mut encoder, output)?;
Ok(n_bytes as usize)
}
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ mod tests {

test_variant!(snappy, compressed_len = 2572398,);
test_variant!(gzip, compressed_len = 157192, level = None);
test_variant!(brotli, compressed_len = 729, level = None);
test_variant!(brotli, compressed_len = 128, level = None);
test_variant!(bzip2, compressed_len = 14207, level = None);
test_variant!(deflate, compressed_len = 157174, level = None);
test_variant!(zstd, compressed_len = 4990, level = None);
Expand Down

0 comments on commit f6570b9

Please sign in to comment.