Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optionally support LZO #51

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ crate-type = ["cdylib"]
[features]
default = ["mimallocator", "extension-module"]
mimallocator = ["mimalloc"]
lzo = ["minilzo3"]
extension-module = ["pyo3/extension-module"]

[profile.release]
Expand All @@ -29,6 +30,7 @@ lz4 = "^1"
flate2 = "^1"
zstd = "0.6.1+zstd.1.4.9"
numpy = "0.13.0"
minilzo3 = { git = "https://github.com/milesgranger/minilzo3.git", optional = true, version = "0.1.0" }

[dependencies.mimalloc]
version = "0.1.24"
Expand Down
339 changes: 339 additions & 0 deletions LICENSE-GPL

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ bench-brotli:
bench-zstd:
$(BASE_BENCH_CMD) zstd

bench-lzo:
$(BASE_BENCH_CMD) lzo

dev-install:
rm -rf ./dist
maturin build --release --out dist --no-sdist --interpreter $(shell which python)
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ Available algorithms:
- [X] Gzip
- [X] Deflate
- [X] ZSTD
- [X] LZO (optional, due to GPL license)
- **Note:** If installed/built with LZO feature, then the GPL License applies.

All available for use as:

Expand Down
39 changes: 39 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,42 @@ test_zstd[urls.10K-cramjam] 4,454.6180 (29.67) 6,157
test_zstd[urls.10K-zstd] 4,201.4410 (27.99) 4,969.7730 (12.22) 4,298.6442 (24.33) 103.9367 (6.55) 4,271.2825 (25.25) 69.8585 (202.80) 18;13 232.6315 (0.04) 228 1
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
```

### LZO

`make bench-lzo`

```bash
---------------------------------------------------------------------------------------------------------- benchmark: 28 tests ----------------------------------------------------------------------------------------------------------
Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_lzo[Mark.Twain-Tom.Sawyer.txt-cramjam] 71.3860 (3.74) 149.9000 (2.88) 76.3107 (3.81) 4.9416 (2.77) 75.4021 (3.83) 3.1543 (19.23) 479;468 13,104.3145 (0.26) 7391 1
test_lzo[Mark.Twain-Tom.Sawyer.txt-lzo] 66.4451 (3.48) 119.3220 (2.30) 69.0805 (3.45) 4.4744 (2.51) 67.8891 (3.45) 0.6841 (4.17) 448;1085 14,475.8556 (0.29) 7197 1
test_lzo[alice29.txt-cramjam] 827.6199 (43.33) 1,195.3199 (23.00) 849.6277 (42.41) 39.4083 (22.12) 834.0060 (42.32) 22.2070 (135.38) 86;92 1,176.9861 (0.02) 1176 1
test_lzo[alice29.txt-lzo] 858.3551 (44.94) 1,068.9519 (20.57) 876.0361 (43.73) 23.2309 (13.04) 864.6360 (43.88) 20.8670 (127.22) 161;72 1,141.5055 (0.02) 1076 1
test_lzo[asyoulik.txt-cramjam] 714.1409 (37.39) 1,070.4539 (20.60) 733.5739 (36.62) 29.1130 (16.34) 721.7090 (36.62) 21.5740 (131.53) 138;86 1,363.1892 (0.03) 1301 1
test_lzo[asyoulik.txt-lzo] 738.7210 (38.67) 1,023.6730 (19.70) 753.5616 (37.62) 21.0062 (11.79) 743.6920 (37.74) 19.9350 (121.53) 173;64 1,327.0315 (0.03) 1330 1
test_lzo[fifty-four-mb-random-cramjam] 27,467.8700 (>1000.0) 28,415.7479 (546.71) 27,741.2799 (>1000.0) 229.6713 (128.94) 27,684.2180 (>1000.0) 301.5934 (>1000.0) 7;2 36.0474 (0.00) 35 1
test_lzo[fifty-four-mb-random-lzo] 54,500.6340 (>1000.0) 55,803.2760 (>1000.0) 54,953.2404 (>1000.0) 385.7472 (216.56) 54,841.4850 (>1000.0) 484.9015 (>1000.0) 5;0 18.1973 (0.00) 19 1
test_lzo[fifty-four-mb-repeating-cramjam] 16,735.6710 (876.17) 35,315.1960 (679.45) 17,499.6351 (873.59) 3,109.0788 (>1000.0) 16,896.5759 (857.43) 147.1995 (897.40) 1;5 57.1440 (0.00) 35 1
test_lzo[fifty-four-mb-repeating-lzo] 28,781.7999 (>1000.0) 31,379.0981 (603.72) 29,118.9751 (>1000.0) 447.9825 (251.50) 29,069.6489 (>1000.0) 295.3380 (>1000.0) 2;2 34.3419 (0.00) 34 1
test_lzo[fireworks.jpeg-cramjam] 29.5170 (1.55) 68.3580 (1.32) 30.3508 (1.52) 2.2480 (1.26) 29.8760 (1.52) 0.1640 (1.0) 501;1528 32,948.1127 (0.66) 14140 1
test_lzo[fireworks.jpeg-lzo] 19.1010 (1.0) 51.9759 (1.0) 20.0319 (1.0) 1.7813 (1.0) 19.7060 (1.0) 0.3839 (2.34) 786;1860 49,920.4164 (1.0) 29558 1
test_lzo[geo.protodata-cramjam] 208.8640 (10.93) 376.4411 (7.24) 216.3516 (10.80) 9.7457 (5.47) 213.1091 (10.81) 2.8603 (17.44) 411;670 4,622.1062 (0.09) 4201 1
test_lzo[geo.protodata-lzo] 198.9040 (10.41) 285.7670 (5.50) 205.2590 (10.25) 8.9124 (5.00) 202.0630 (10.25) 2.1450 (13.08) 444;796 4,871.8945 (0.10) 4333 1
test_lzo[html-cramjam] 229.1009 (11.99) 295.4110 (5.68) 235.0136 (11.73) 8.9620 (5.03) 232.0290 (11.77) 1.7711 (10.80) 307;488 4,255.0721 (0.09) 3083 1
test_lzo[html-lzo] 222.2451 (11.64) 320.2440 (6.16) 227.7268 (11.37) 9.1389 (5.13) 224.6010 (11.40) 1.6597 (10.12) 417;635 4,391.2270 (0.09) 3941 1
test_lzo[html_x_4-cramjam] 908.0720 (47.54) 1,212.6609 (23.33) 926.7142 (46.26) 24.9970 (14.03) 914.4699 (46.41) 22.1749 (135.19) 146;63 1,079.0813 (0.02) 1013 1
test_lzo[html_x_4-lzo] 890.4270 (46.62) 1,128.3380 (21.71) 909.5622 (45.41) 24.7532 (13.90) 897.9515 (45.57) 20.7745 (126.65) 154;85 1,099.4300 (0.02) 1068 1
test_lzo[kppkn.gtb-cramjam] 685.0970 (35.87) 886.8390 (17.06) 702.4909 (35.07) 23.3337 (13.10) 693.5750 (35.20) 20.2102 (123.21) 168;89 1,423.5060 (0.03) 1401 1
test_lzo[kppkn.gtb-lzo] 690.5049 (36.15) 910.5690 (17.52) 705.2198 (35.20) 19.5436 (10.97) 695.0080 (35.27) 19.8465 (120.99) 229;64 1,417.9975 (0.03) 1411 1
test_lzo[lcet10.txt-cramjam] 2,229.8581 (116.74) 2,697.2240 (51.89) 2,272.3683 (113.44) 51.5769 (28.96) 2,257.0855 (114.54) 47.8630 (291.80) 44;15 440.0695 (0.01) 444 1
test_lzo[lcet10.txt-lzo] 2,302.0359 (120.52) 2,882.9101 (55.47) 2,345.1378 (117.07) 49.3783 (27.72) 2,331.6385 (118.32) 50.6985 (309.08) 42;11 426.4142 (0.01) 404 1
test_lzo[paper-100k.pdf-cramjam] 30.6889 (1.61) 70.6050 (1.36) 31.9516 (1.60) 2.8093 (1.58) 31.0800 (1.58) 0.2179 (1.33) 1259;2846 31,297.3057 (0.63) 14775 1
test_lzo[paper-100k.pdf-lzo] 22.9060 (1.20) 70.6300 (1.36) 23.7881 (1.19) 2.1764 (1.22) 23.3080 (1.18) 0.4090 (2.49) 696;1758 42,037.7894 (0.84) 21124 1
test_lzo[plrabn12.txt-cramjam] 2,856.4630 (149.55) 3,262.9370 (62.78) 2,916.4901 (145.59) 65.0193 (36.50) 2,895.8830 (146.95) 61.5970 (375.52) 36;18 342.8779 (0.01) 304 1
test_lzo[plrabn12.txt-lzo] 2,968.7471 (155.42) 3,348.4920 (64.42) 3,021.6969 (150.84) 58.9855 (33.11) 2,999.0551 (152.19) 59.6350 (363.56) 39;16 330.9399 (0.01) 297 1
test_lzo[urls.10K-cramjam] 2,569.2440 (134.51) 3,112.3650 (59.88) 2,618.0809 (130.70) 56.7014 (31.83) 2,598.5630 (131.87) 55.2737 (336.97) 44;15 381.9592 (0.01) 331 1
test_lzo[urls.10K-lzo] 2,547.9870 (133.40) 3,008.4450 (57.88) 2,595.6427 (129.58) 57.5765 (32.32) 2,576.2780 (130.74) 51.2695 (312.56) 41;20 385.2610 (0.01) 324 1
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
```
21 changes: 21 additions & 0 deletions benchmarks/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,27 @@ def test_zstd(benchmark, file, use_cramjam: bool):
)


@pytest.mark.parametrize(
"use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "lzo"
)
@pytest.mark.parametrize("file", FILES, ids=lambda val: val.name)
def test_lzo(benchmark, file, use_cramjam: bool):
import lzo

data = file.read_bytes()
if use_cramjam:
benchmark(
round_trip,
compress=cramjam.lzo.compress,
decompress=cramjam.lzo.decompress,
data=data,
)
else:
benchmark(
round_trip, compress=lzo.compress, decompress=lzo.decompress, data=data,
)


@profile
def memory_profile():

Expand Down
8 changes: 8 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ pub mod lz4;
pub mod snappy;
pub mod zstd;

#[cfg(feature = "lzo")]
#[cfg(not(target_os = "windows"))]
pub mod lzo;

use pyo3::prelude::*;

use crate::io::{AsBytes, RustyBuffer, RustyFile, RustyNumpyArray, RustyPyByteArray, RustyPyBytes};
Expand Down Expand Up @@ -240,6 +244,10 @@ fn cramjam(py: Python, m: &PyModule) -> PyResult<()> {
make_submodule!(py -> m -> deflate);
make_submodule!(py -> m -> zstd);

#[cfg(feature = "lzo")]
#[cfg(not(target_os = "windows"))]
make_submodule!(py -> m -> lzo);

Ok(())
}

Expand Down
61 changes: 61 additions & 0 deletions src/lzo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
//! lzo de/compression interface
use crate::exceptions::{CompressionError, DecompressionError};
use crate::io::{AsBytes, RustyBuffer};
use crate::{to_py_err, BytesType};
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use pyo3::PyResult;

pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
m.add_function(wrap_pyfunction!(decompress, m)?)?;
m.add_function(wrap_pyfunction!(compress_into, m)?)?;
m.add_function(wrap_pyfunction!(decompress_into, m)?)?;
Ok(())
}

/// LZO decompression
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.lzo.decompress(compressed_raw_bytes)
/// ```
#[pyfunction]
#[allow(unused_variables)]
pub fn decompress(data: BytesType, output_len: Option<usize>) -> PyResult<RustyBuffer> {
let output = to_py_err!(DecompressionError -> minilzo3::decompress_vec(data.as_bytes()))?;
Ok(RustyBuffer::from(output))
}

/// LZO compression
///
/// This follows the header format of `python-lzo` where the first byte indicates if it's level 1
/// compression (default; and only one implemented here thus far) and the next four bytes are
/// u32 big endian formatted bytes indicating the length of the original input, before compression.
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.lzo.compress(b'some bytes here')
/// ```
#[pyfunction]
#[allow(unused_variables)]
pub fn compress(data: BytesType, output_len: Option<usize>) -> PyResult<RustyBuffer> {
let output = to_py_err!(CompressionError -> minilzo3::compress_vec(data.as_bytes(), true))?;
Ok(RustyBuffer::from(output))
}

/// Compress raw format directly into an output buffer
#[pyfunction]
pub fn compress_into(input: BytesType, mut output: BytesType) -> PyResult<usize> {
let output = minilzo3::compress(input.as_bytes(), output.as_bytes_mut(), true);
to_py_err!(CompressionError -> output)
}

/// Decompress raw format directly into an output buffer
#[pyfunction]
pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult<usize> {
let output = minilzo3::decompress(input.as_bytes(), output.as_bytes_mut());
to_py_err!(DecompressionError -> output)
}