From 75f510903ea200df583561c22842a25dd344e8f3 Mon Sep 17 00:00:00 2001 From: Marvin Countryman Date: Fri, 29 Mar 2024 19:27:42 -0400 Subject: [PATCH] Re-structure architecture specific implementations - Add `arch` folder to group implementations by target arch - Add `cfg-if` to reduce headaches when reading `cfg` compile time feature gates - Rename `[Ii]mp` -> `[Uu]pdate` --- Cargo.lock | 198 ++++++++++----------------- Cargo.toml | 3 + bench/variants.rs | 54 +++++--- src/arch.rs | 5 + src/{imp => arch/arm}/neon.rs | 0 src/arch/scalar.rs | 30 ++++ src/{imp => arch}/wasm.rs | 86 +++--------- src/arch/x86.rs | 41 ++++++ src/arch/x86/avx2.rs | 131 ++++++++++++++++++ src/{imp => arch/x86}/avx512.rs | 128 ++++-------------- src/arch/x86/sse2.rs | 149 ++++++++++++++++++++ src/arch/x86/ssse3.rs | 135 ++++++++++++++++++ src/imp/avx2.rs | 214 ----------------------------- src/imp/mod.rs | 23 ---- src/imp/scalar.rs | 69 ---------- src/imp/sse2.rs | 233 -------------------------------- src/imp/ssse3.rs | 219 ------------------------------ src/lib.rs | 34 +---- src/update.rs | 36 +++++ 19 files changed, 685 insertions(+), 1103 deletions(-) create mode 100644 src/arch.rs rename src/{imp => arch/arm}/neon.rs (100%) create mode 100644 src/arch/scalar.rs rename src/{imp => arch}/wasm.rs (69%) create mode 100644 src/arch/x86.rs create mode 100644 src/arch/x86/avx2.rs rename src/{imp => arch/x86}/avx512.rs (57%) create mode 100644 src/arch/x86/sse2.rs create mode 100644 src/arch/x86/ssse3.rs delete mode 100644 src/imp/avx2.rs delete mode 100644 src/imp/mod.rs delete mode 100644 src/imp/scalar.rs delete mode 100644 src/imp/sse2.rs delete mode 100644 src/imp/ssse3.rs create mode 100644 src/update.rs diff --git a/Cargo.lock b/Cargo.lock index 2a0790c..a5642be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16,9 +16,9 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] name = "aho-corasick" -version = "1.0.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -29,16 +29,16 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi 0.1.19", + "hermit-abi", "libc", "winapi", ] [[package]] name = "autocfg" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "bitflags" @@ -48,9 +48,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" [[package]] name = "cast" @@ -111,54 +111,36 @@ dependencies = [ "itertools", ] -[[package]] -name = "crossbeam-channel" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", - "memoffset", - "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" -dependencies = [ - "cfg-if", -] +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "csv" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ "csv-core", "itoa", @@ -168,24 +150,24 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" dependencies = [ "memchr", ] [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", @@ -194,9 +176,9 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" [[package]] name = "hermit-abi" @@ -207,12 +189,6 @@ dependencies = [ "libc", ] -[[package]] -name = "hermit-abi" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" - [[package]] name = "itertools" version = "0.10.5" @@ -224,15 +200,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] @@ -245,55 +221,36 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.147" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "log" -version = "0.4.19" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - -[[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" -dependencies = [ - "autocfg", -] +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.2", - "libc", -] - [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" @@ -385,9 +342,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.7.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -395,21 +352,19 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] name = "regex" -version = "1.9.1" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", @@ -419,9 +374,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", @@ -430,15 +385,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.4" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -449,17 +404,11 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - [[package]] name = "serde" -version = "1.0.175" +version = "1.0.179" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b" +checksum = "0a5bf42b8d227d4abf38a1ddb08602e229108a517cd4e5bb28f9c7eaafdce5c0" [[package]] name = "serde_cbor" @@ -473,9 +422,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.175" +version = "1.0.179" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4" +checksum = "741e124f5485c7e60c03b043f79f320bff3527f4bbf12cf3831750dc46a0ec2c" dependencies = [ "proc-macro2", "quote", @@ -484,9 +433,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.103" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" +checksum = "cb0652c533506ad7a2e353cce269330d6afd8bdfb6d75e0ace5b35aacbd7b9e9" dependencies = [ "itoa", "ryu", @@ -499,6 +448,7 @@ version = "0.3.7" dependencies = [ "adler", "adler32", + "cfg-if", "criterion", "rand", ] @@ -541,15 +491,15 @@ checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "walkdir" -version = "2.3.3" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -563,9 +513,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -573,9 +523,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", @@ -588,9 +538,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -598,9 +548,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", @@ -611,15 +561,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "web-sys" -version = "0.3.64" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" dependencies = [ "js-sys", "wasm-bindgen", @@ -643,9 +593,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] diff --git a/Cargo.toml b/Cargo.toml index c58dd05..d7c834d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,3 +37,6 @@ criterion = "0.3" # competition adler = "1.0.2" adler32 = "1.2.0" + +[dependencies] +cfg-if = "1.0.0" diff --git a/bench/variants.rs b/bench/variants.rs index aa9e0a6..f4028bd 100644 --- a/bench/variants.rs +++ b/bench/variants.rs @@ -3,57 +3,69 @@ use criterion::{ Criterion, Throughput, }; use rand::{thread_rng, RngCore}; -use simd_adler32::imp::{avx2, avx512, scalar, sse2, ssse3, wasm, Adler32Imp}; +use simd_adler32::{arch::*, update::Adler32Update}; pub fn bench(c: &mut Criterion) { - let mut data = [0; 100_000]; + let mut data = [1; 100_000]; let mut group = c.benchmark_group("variants"); thread_rng().fill_bytes(&mut data[..]); - if let Some(update) = avx512::get_imp() { - bench_variant(&mut group, "avx512", &data, update); - } + cfg_if::cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - if let Some(update) = avx2::get_imp() { - bench_variant(&mut group, "avx2", &data, update); - } + if let Some(update) = x86::avx512::get_update_if_supported() { + bench_variant(&mut group, "avx512", &data, update); + } - if let Some(update) = ssse3::get_imp() { - bench_variant(&mut group, "ssse3", &data, update); - } + if let Some(update) = x86::avx2::get_update_if_supported() { + bench_variant(&mut group, "avx2", &data, update); + } - if let Some(update) = sse2::get_imp() { - bench_variant(&mut group, "sse2", &data, update); - } + if let Some(update) = x86::ssse3::get_update_if_supported() { + bench_variant(&mut group, "ssse3", &data, update); + } + + if let Some(update) = x86::sse2::get_update_if_supported() { + bench_variant(&mut group, "sse2", &data, update); + } + + } else if #[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))] { + + if let Some(update) = wasm::get_update_if_supported() { + bench_variant(&mut group, "wasm", &data, update); + } - if let Some(update) = wasm::get_imp() { - bench_variant(&mut group, "wasm", &data, update); + } } bench_variant(&mut group, "scalar", &data, scalar::update); } -fn bench_variant(g: &mut BenchmarkGroup, name: &str, data: &[u8], imp: Adler32Imp) -where +fn bench_variant( + g: &mut BenchmarkGroup, + name: &str, + data: &[u8], + update: Adler32Update, +) where M: Measurement, { g.throughput(Throughput::Bytes(10)).bench_with_input( format!("{}-10b", name), &data[..10], - |b, data| b.iter(|| black_box(imp(1, 0, data))), + |b, data| b.iter(|| black_box(update(1, 0, data))), ); g.throughput(Throughput::Bytes(10_000)).bench_with_input( format!("{}-10k", name), &data[..10_000], - |b, data| b.iter(|| black_box(imp(1, 0, data))), + |b, data| b.iter(|| black_box(update(1, 0, data))), ); g.throughput(Throughput::Bytes(100_000)).bench_with_input( format!("{}-100k", name), &data[..100_000], - |b, data| b.iter(|| black_box(imp(1, 0, data))), + |b, data| b.iter(|| black_box(update(1, 0, data))), ); } diff --git a/src/arch.rs b/src/arch.rs new file mode 100644 index 0000000..79db70c --- /dev/null +++ b/src/arch.rs @@ -0,0 +1,5 @@ +pub mod scalar; +#[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))] +pub mod wasm; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub mod x86; diff --git a/src/imp/neon.rs b/src/arch/arm/neon.rs similarity index 100% rename from src/imp/neon.rs rename to src/arch/arm/neon.rs diff --git a/src/arch/scalar.rs b/src/arch/scalar.rs new file mode 100644 index 0000000..da888b9 --- /dev/null +++ b/src/arch/scalar.rs @@ -0,0 +1,30 @@ +const MOD: u32 = 65521; +const NMAX: usize = 5552; + +pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(NMAX); + let remainder = chunks.remainder(); + + for chunk in chunks { + for byte in chunk { + a = a.wrapping_add(*byte as _); + b = b.wrapping_add(a); + } + + a %= MOD; + b %= MOD; + } + + for byte in remainder { + a = a.wrapping_add(*byte as _); + b = b.wrapping_add(a); + } + + a %= MOD; + b %= MOD; + + (a as u16, b as u16) +} diff --git a/src/imp/wasm.rs b/src/arch/wasm.rs similarity index 69% rename from src/imp/wasm.rs rename to src/arch/wasm.rs index 5a9152b..9b67e82 100644 --- a/src/imp/wasm.rs +++ b/src/arch/wasm.rs @@ -1,20 +1,26 @@ -use super::Adler32Imp; - -/// Resolves update implementation if CPU supports simd128 instructions. -pub fn get_imp() -> Option { - get_imp_inner() +use crate::update::Adler32Update; + +pub fn get_update_if_supported() -> Option { + cfg_if::cfg_if! { + if #[cfg(target_feature = "simd128")] { + Some(|a, b, bytes| unsafe { update(a, b, bytes) }) + } else { + None + } + } } -#[inline] #[cfg(target_feature = "simd128")] -fn get_imp_inner() -> Option { - Some(imp::update) +#[inline] +pub unsafe fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + imp::update(a, b, data) } -#[inline] #[cfg(not(target_feature = "simd128"))] -fn get_imp_inner() -> Option { - None +mod imp { + pub unsafe fn update(_: u16, _: u16, _: &[u8]) -> (u16, u16) { + panic!("Target platform does not support `simd128`") + } } #[cfg(target_feature = "simd128")] @@ -29,13 +35,9 @@ mod imp { #[cfg(target_arch = "wasm64")] use core::arch::wasm64::*; - pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { - update_imp(a, b, data) - } - #[inline] #[target_feature(enable = "simd128")] - fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + pub unsafe fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { let mut a = a as u32; let mut b = b as u32; @@ -163,55 +165,3 @@ mod imp { u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) } } - -#[cfg(test)] -mod tests { - use rand::Rng; - - #[test] - fn zeroes() { - assert_sum_eq(&[]); - assert_sum_eq(&[0]); - assert_sum_eq(&[0, 0]); - assert_sum_eq(&[0; 100]); - assert_sum_eq(&[0; 1024]); - assert_sum_eq(&[0; 512 * 1024]); - } - - #[test] - fn ones() { - assert_sum_eq(&[]); - assert_sum_eq(&[1]); - assert_sum_eq(&[1, 1]); - assert_sum_eq(&[1; 100]); - assert_sum_eq(&[1; 1024]); - assert_sum_eq(&[1; 512 * 1024]); - } - - #[test] - fn random() { - let mut random = [0; 512 * 1024]; - rand::thread_rng().fill(&mut random[..]); - - assert_sum_eq(&random[..1]); - assert_sum_eq(&random[..100]); - assert_sum_eq(&random[..1024]); - assert_sum_eq(&random[..512 * 1024]); - } - - /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. - #[test] - fn wiki() { - assert_sum_eq(b"Wikipedia"); - } - - fn assert_sum_eq(data: &[u8]) { - if let Some(update) = super::get_imp() { - let (a, b) = update(1, 0, data); - let left = u32::from(b) << 16 | u32::from(a); - let right = adler::adler32_slice(data); - - assert_eq!(left, right, "len({})", data.len()); - } - } -} diff --git a/src/arch/x86.rs b/src/arch/x86.rs new file mode 100644 index 0000000..9f3d1a0 --- /dev/null +++ b/src/arch/x86.rs @@ -0,0 +1,41 @@ +pub mod avx2; +pub mod avx512; +pub mod sse2; +pub mod ssse3; + +/// A macro to test whether a CPU feature is available on x86/x86-x64 platforms. +/// +/// This macro will attempt to test at runtime if `std` feature is enabled. Otherwise will +/// fallback to target_feature conditional compilation flags. +#[allow(unused_macros)] +macro_rules! is_x86_feature_detected { + ($name:tt) => {{ + #[cfg(feature = "std")] + #[inline(always)] + fn __is_x86_feature_detected() -> bool { + std::is_x86_feature_detected!($name) + } + + #[cfg(all(not(feature = "std"), target_feature = $name))] + #[inline(always)] + fn __is_x86_feature_detected() -> bool { + true + } + + #[cfg(all(not(feature = "std"), not(target_feature = $name)))] + #[inline(always)] + fn __is_x86_feature_detected() -> bool { + false + } + + __is_x86_feature_detected() + }}; +} + +pub(crate) use is_x86_feature_detected; + +#[inline] +#[allow(non_snake_case)] +pub const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 { + ((z << 6) | (y << 4) | (x << 2) | w) as i32 +} diff --git a/src/arch/x86/avx2.rs b/src/arch/x86/avx2.rs new file mode 100644 index 0000000..5e11282 --- /dev/null +++ b/src/arch/x86/avx2.rs @@ -0,0 +1,131 @@ +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +use crate::update::Adler32Update; + +const MOD: u32 = 65521; +const NMAX: usize = 5552; +const BLOCK_SIZE: usize = 32; +const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; + +pub fn get_update_if_supported() -> Option { + if super::is_x86_feature_detected!("avx2") { + fn stub(a: u16, b: u16, bytes: &[u8]) -> (u16, u16) { + unsafe { update(a, b, bytes) } + } + + Some(stub) + } else { + None + } +} + +#[inline] +#[target_feature(enable = "avx2")] +pub unsafe fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(CHUNK_SIZE); + let remainder = chunks.remainder(); + for chunk in chunks { + update_chunk_block(&mut a, &mut b, chunk); + } + + update_block(&mut a, &mut b, remainder); + + (a as u16, b as u16) +} + +#[inline] +unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert_eq!( + chunk.len(), + CHUNK_SIZE, + "Unexpected chunk size (expected {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + reduce_add_blocks(a, b, chunk); + + *a %= MOD; + *b %= MOD; +} + +#[inline] +unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert!( + chunk.len() <= CHUNK_SIZE, + "Unexpected chunk size (expected <= {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + for byte in reduce_add_blocks(a, b, chunk) { + *a += *byte as u32; + *b += *a; + } + + *a %= MOD; + *b %= MOD; +} + +#[inline(always)] +unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { + if chunk.len() < BLOCK_SIZE { + return chunk; + } + + let blocks = chunk.chunks_exact(BLOCK_SIZE); + let blocks_remainder = blocks.remainder(); + + let one_v = _mm256_set1_epi16(1); + let zero_v = _mm256_setzero_si256(); + let weights = get_weights(); + + let mut p_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (*a * blocks.len() as u32) as _); + let mut a_v = _mm256_setzero_si256(); + let mut b_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, *b as _); + + for block in blocks { + let block_ptr = block.as_ptr() as *const _; + let block = _mm256_loadu_si256(block_ptr); + + p_v = _mm256_add_epi32(p_v, a_v); + + a_v = _mm256_add_epi32(a_v, _mm256_sad_epu8(block, zero_v)); + let mad = _mm256_maddubs_epi16(block, weights); + b_v = _mm256_add_epi32(b_v, _mm256_madd_epi16(mad, one_v)); + } + + b_v = _mm256_add_epi32(b_v, _mm256_slli_epi32(p_v, 5)); + + *a += reduce_add(a_v); + *b = reduce_add(b_v); + + blocks_remainder +} + +#[inline(always)] +unsafe fn reduce_add(v: __m256i) -> u32 { + let sum = _mm_add_epi32(_mm256_castsi256_si128(v), _mm256_extracti128_si256(v, 1)); + let hi = _mm_unpackhi_epi64(sum, sum); + + let sum = _mm_add_epi32(hi, sum); + let hi = _mm_shuffle_epi32(sum, super::_mm_shuffle(2, 3, 0, 1)); + + let sum = _mm_add_epi32(sum, hi); + + _mm_cvtsi128_si32(sum) as _ +} + +#[inline(always)] +unsafe fn get_weights() -> __m256i { + _mm256_set_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ) +} diff --git a/src/imp/avx512.rs b/src/arch/x86/avx512.rs similarity index 57% rename from src/imp/avx512.rs rename to src/arch/x86/avx512.rs index ebb32fa..449350b 100644 --- a/src/imp/avx512.rs +++ b/src/arch/x86/avx512.rs @@ -1,77 +1,49 @@ -use super::Adler32Imp; +use crate::update::Adler32Update; -/// Resolves update implementation if CPU supports avx512f and avx512bw instructions. -pub fn get_imp() -> Option { - get_imp_inner() -} - -#[inline] -#[cfg(all( - feature = "std", - feature = "nightly", - any(target_arch = "x86", target_arch = "x86_64") -))] -fn get_imp_inner() -> Option { - let has_avx512f = std::is_x86_feature_detected!("avx512f"); - let has_avx512bw = std::is_x86_feature_detected!("avx512bw"); +pub fn get_update_if_supported() -> Option { + let has_avx512f = super::is_x86_feature_detected!("avx512f"); + let has_avx512bw = super::is_x86_feature_detected!("avx512bw"); if has_avx512f && has_avx512bw { - Some(imp::update) + // FIXME: What if runtime support but dev forgot to compile with feature flags? + fn stub(a: u16, b: u16, bytes: &[u8]) -> (u16, u16) { + unsafe { update(a, b, bytes) } + } + + Some(stub) } else { None } } #[inline] -#[cfg(all( - feature = "nightly", - all(target_feature = "avx512f", target_feature = "avx512bw"), - not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) -))] -fn get_imp_inner() -> Option { - Some(imp::update) +pub unsafe fn update(a: u16, b: u16, bytes: &[u8]) -> (u16, u16) { + imp::update(a, b, bytes) } -#[inline] -#[cfg(all( - not(all(feature = "nightly", target_feature = "avx512f", target_feature = "avx512bw")), - not(all( - feature = "std", - feature = "nightly", - any(target_arch = "x86", target_arch = "x86_64") - )) -))] -fn get_imp_inner() -> Option { - None +#[cfg(not(all(feature = "nightly", any(target_arch = "x86", target_arch = "x86_64"))))] +mod imp { + pub unsafe fn update(_: u16, _: u16, _: &[u8]) -> (u16, u16) { + panic!("Target platform does not support `avx512f` and or `avx512bw`") + } } -#[cfg(all( - feature = "nightly", - any(target_arch = "x86", target_arch = "x86_64"), - any( - feature = "std", - all(target_feature = "avx512f", target_feature = "avx512bw") - ) -))] +#[cfg(all(feature = "nightly", any(target_arch = "x86", target_arch = "x86_64")))] mod imp { - const MOD: u32 = 65521; - const NMAX: usize = 5552; - const BLOCK_SIZE: usize = 64; - const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; - #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; - pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { - unsafe { update_imp(a, b, data) } - } + const MOD: u32 = 65521; + const NMAX: usize = 5552; + const BLOCK_SIZE: usize = 64; + const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; #[inline] #[target_feature(enable = "avx512f")] #[target_feature(enable = "avx512bw")] - unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + pub unsafe fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { let mut a = a as u32; let mut b = b as u32; @@ -171,7 +143,7 @@ mod imp { let hi = _mm_unpackhi_epi64(sum, sum); let sum = _mm_add_epi32(hi, sum); - let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); + let hi = _mm_shuffle_epi32(sum, crate::x86::_mm_shuffle(2, 3, 0, 1)); let sum = _mm_add_epi32(sum, hi); let sum = _mm_cvtsi128_si32(sum) as _; @@ -188,55 +160,3 @@ mod imp { ) } } - -#[cfg(test)] -mod tests { - use rand::Rng; - - #[test] - fn zeroes() { - assert_sum_eq(&[]); - assert_sum_eq(&[0]); - assert_sum_eq(&[0, 0]); - assert_sum_eq(&[0; 100]); - assert_sum_eq(&[0; 1024]); - assert_sum_eq(&[0; 1024 * 1024]); - } - - #[test] - fn ones() { - assert_sum_eq(&[]); - assert_sum_eq(&[1]); - assert_sum_eq(&[1, 1]); - assert_sum_eq(&[1; 100]); - assert_sum_eq(&[1; 1024]); - assert_sum_eq(&[1; 1024 * 1024]); - } - - #[test] - fn random() { - let mut random = [0; 1024 * 1024]; - rand::thread_rng().fill(&mut random[..]); - - assert_sum_eq(&random[..1]); - assert_sum_eq(&random[..100]); - assert_sum_eq(&random[..1024]); - assert_sum_eq(&random[..1024 * 1024]); - } - - /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. - #[test] - fn wiki() { - assert_sum_eq(b"Wikipedia"); - } - - fn assert_sum_eq(data: &[u8]) { - if let Some(update) = super::get_imp() { - let (a, b) = update(1, 0, data); - let left = u32::from(b) << 16 | u32::from(a); - let right = adler::adler32_slice(data); - - assert_eq!(left, right, "len({})", data.len()); - } - } -} diff --git a/src/arch/x86/sse2.rs b/src/arch/x86/sse2.rs new file mode 100644 index 0000000..05ffd44 --- /dev/null +++ b/src/arch/x86/sse2.rs @@ -0,0 +1,149 @@ +use crate::update::Adler32Update; +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +const MOD: u32 = 65521; +const NMAX: usize = 5552; +const BLOCK_SIZE: usize = 32; +const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; + +pub fn get_update_if_supported() -> Option { + if super::is_x86_feature_detected!("sse2") { + fn stub(a: u16, b: u16, bytes: &[u8]) -> (u16, u16) { + unsafe { update(a, b, bytes) } + } + + Some(stub) + } else { + None + } +} + +#[inline] +#[target_feature(enable = "sse2")] +pub unsafe fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(CHUNK_SIZE); + let remainder = chunks.remainder(); + for chunk in chunks { + update_chunk_block(&mut a, &mut b, chunk); + } + + update_block(&mut a, &mut b, remainder); + + (a as u16, b as u16) +} + +unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert_eq!( + chunk.len(), + CHUNK_SIZE, + "Unexpected chunk size (expected {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + reduce_add_blocks(a, b, chunk); + + *a %= MOD; + *b %= MOD; +} + +unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert!( + chunk.len() <= CHUNK_SIZE, + "Unexpected chunk size (expected <= {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + for byte in reduce_add_blocks(a, b, chunk) { + *a += *byte as u32; + *b += *a; + } + + *a %= MOD; + *b %= MOD; +} + +#[inline(always)] +unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { + if chunk.len() < BLOCK_SIZE { + return chunk; + } + + let blocks = chunk.chunks_exact(BLOCK_SIZE); + let blocks_remainder = blocks.remainder(); + + let zero_v = _mm_setzero_si128(); + let weight_hi_v = get_weight_hi(); + let weight_lo_v = get_weight_lo(); + + let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _); + let mut a_v = _mm_setzero_si128(); + let mut b_v = _mm_set_epi32(0, 0, 0, *b as _); + + for block in blocks { + let block_ptr = block.as_ptr() as *const _; + let left_v = _mm_loadu_si128(block_ptr); + let right_v = _mm_loadu_si128(block_ptr.add(1)); + + p_v = _mm_add_epi32(p_v, a_v); + + a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v)); + let mad = maddubs(left_v, weight_hi_v); + b_v = _mm_add_epi32(b_v, mad); + + a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v)); + let mad = maddubs(right_v, weight_lo_v); + b_v = _mm_add_epi32(b_v, mad); + } + + b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5)); + + *a += reduce_add(a_v); + *b = reduce_add(b_v); + + blocks_remainder +} + +#[inline(always)] +unsafe fn maddubs(a: __m128i, b: __m128i) -> __m128i { + let a_lo = _mm_unpacklo_epi8(a, _mm_setzero_si128()); + let a_hi = _mm_unpackhi_epi8(a, _mm_setzero_si128()); + + let b_lo = _mm_unpacklo_epi8(b, _mm_setzero_si128()); + let b_hi = _mm_unpackhi_epi8(b, _mm_setzero_si128()); + + let lo = _mm_madd_epi16(a_lo, b_lo); + let hi = _mm_madd_epi16(a_hi, b_hi); + + _mm_add_epi32(lo, hi) +} + +#[inline(always)] +unsafe fn reduce_add(v: __m128i) -> u32 { + let hi = _mm_unpackhi_epi64(v, v); + let sum = _mm_add_epi32(hi, v); + let hi = _mm_shuffle_epi32(sum, super::_mm_shuffle(2, 3, 0, 1)); + + let sum = _mm_add_epi32(sum, hi); + + _mm_cvtsi128_si32(sum) as _ +} + +#[inline(always)] +unsafe fn get_weight_lo() -> __m128i { + _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) +} + +#[inline(always)] +unsafe fn get_weight_hi() -> __m128i { + _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ) +} diff --git a/src/arch/x86/ssse3.rs b/src/arch/x86/ssse3.rs new file mode 100644 index 0000000..b0caba3 --- /dev/null +++ b/src/arch/x86/ssse3.rs @@ -0,0 +1,135 @@ +use crate::update::Adler32Update; +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +const MOD: u32 = 65521; +const NMAX: usize = 5552; +const BLOCK_SIZE: usize = 32; +const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; + +pub fn get_update_if_supported() -> Option { + if std::is_x86_feature_detected!("ssse3") { + fn stub(a: u16, b: u16, bytes: &[u8]) -> (u16, u16) { + unsafe { update(a, b, bytes) } + } + + Some(stub) + } else { + None + } +} + +#[inline] +#[target_feature(enable = "ssse3")] +pub unsafe fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(CHUNK_SIZE); + let remainder = chunks.remainder(); + for chunk in chunks { + update_chunk_block(&mut a, &mut b, chunk); + } + + update_block(&mut a, &mut b, remainder); + + (a as u16, b as u16) +} + +unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert_eq!( + chunk.len(), + CHUNK_SIZE, + "Unexpected chunk size (expected {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + reduce_add_blocks(a, b, chunk); + + *a %= MOD; + *b %= MOD; +} + +unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert!( + chunk.len() <= CHUNK_SIZE, + "Unexpected chunk size (expected <= {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + for byte in reduce_add_blocks(a, b, chunk) { + *a += *byte as u32; + *b += *a; + } + + *a %= MOD; + *b %= MOD; +} + +#[inline(always)] +unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { + if chunk.len() < BLOCK_SIZE { + return chunk; + } + + let blocks = chunk.chunks_exact(BLOCK_SIZE); + let blocks_remainder = blocks.remainder(); + + let one_v = _mm_set1_epi16(1); + let zero_v = _mm_set1_epi16(0); + let weight_hi_v = get_weight_hi(); + let weight_lo_v = get_weight_lo(); + + let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _); + let mut a_v = _mm_set_epi32(0, 0, 0, 0); + let mut b_v = _mm_set_epi32(0, 0, 0, *b as _); + + for block in blocks { + let block_ptr = block.as_ptr() as *const _; + let left_v = _mm_loadu_si128(block_ptr); + let right_v = _mm_loadu_si128(block_ptr.add(1)); + + p_v = _mm_add_epi32(p_v, a_v); + + a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v)); + let mad = _mm_maddubs_epi16(left_v, weight_hi_v); + b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v)); + + a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v)); + let mad = _mm_maddubs_epi16(right_v, weight_lo_v); + b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v)); + } + + b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5)); + + *a += reduce_add(a_v); + *b = reduce_add(b_v); + + blocks_remainder +} + +#[inline(always)] +unsafe fn reduce_add(v: __m128i) -> u32 { + let hi = _mm_unpackhi_epi64(v, v); + let sum = _mm_add_epi32(hi, v); + let hi = _mm_shuffle_epi32(sum, super::_mm_shuffle(2, 3, 0, 1)); + let sum = _mm_add_epi32(sum, hi); + + _mm_cvtsi128_si32(sum) as _ +} + +#[inline(always)] +unsafe fn get_weight_lo() -> __m128i { + _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) +} + +#[inline(always)] +unsafe fn get_weight_hi() -> __m128i { + _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ) +} diff --git a/src/imp/avx2.rs b/src/imp/avx2.rs deleted file mode 100644 index c16cc99..0000000 --- a/src/imp/avx2.rs +++ /dev/null @@ -1,214 +0,0 @@ -use super::Adler32Imp; - -/// Resolves update implementation if CPU supports avx2 instructions. -pub fn get_imp() -> Option { - get_imp_inner() -} - -#[inline] -#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] -fn get_imp_inner() -> Option { - if std::is_x86_feature_detected!("avx2") { - Some(imp::update) - } else { - None - } -} - -#[inline] -#[cfg(all( - target_feature = "avx2", - not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) -))] -fn get_imp_inner() -> Option { - Some(imp::update) -} - -#[inline] -#[cfg(all( - not(target_feature = "avx2"), - not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) -))] -fn get_imp_inner() -> Option { - None -} - -#[cfg(all( - any(target_arch = "x86", target_arch = "x86_64"), - any(feature = "std", target_feature = "avx2") -))] -mod imp { - const MOD: u32 = 65521; - const NMAX: usize = 5552; - const BLOCK_SIZE: usize = 32; - const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; - - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { - unsafe { update_imp(a, b, data) } - } - - #[inline] - #[target_feature(enable = "avx2")] - unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { - let mut a = a as u32; - let mut b = b as u32; - - let chunks = data.chunks_exact(CHUNK_SIZE); - let remainder = chunks.remainder(); - for chunk in chunks { - update_chunk_block(&mut a, &mut b, chunk); - } - - update_block(&mut a, &mut b, remainder); - - (a as u16, b as u16) - } - - #[inline] - unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { - debug_assert_eq!( - chunk.len(), - CHUNK_SIZE, - "Unexpected chunk size (expected {}, got {})", - CHUNK_SIZE, - chunk.len() - ); - - reduce_add_blocks(a, b, chunk); - - *a %= MOD; - *b %= MOD; - } - - #[inline] - unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { - debug_assert!( - chunk.len() <= CHUNK_SIZE, - "Unexpected chunk size (expected <= {}, got {})", - CHUNK_SIZE, - chunk.len() - ); - - for byte in reduce_add_blocks(a, b, chunk) { - *a += *byte as u32; - *b += *a; - } - - *a %= MOD; - *b %= MOD; - } - - #[inline(always)] - unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { - if chunk.len() < BLOCK_SIZE { - return chunk; - } - - let blocks = chunk.chunks_exact(BLOCK_SIZE); - let blocks_remainder = blocks.remainder(); - - let one_v = _mm256_set1_epi16(1); - let zero_v = _mm256_setzero_si256(); - let weights = get_weights(); - - let mut p_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (*a * blocks.len() as u32) as _); - let mut a_v = _mm256_setzero_si256(); - let mut b_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, *b as _); - - for block in blocks { - let block_ptr = block.as_ptr() as *const _; - let block = _mm256_loadu_si256(block_ptr); - - p_v = _mm256_add_epi32(p_v, a_v); - - a_v = _mm256_add_epi32(a_v, _mm256_sad_epu8(block, zero_v)); - let mad = _mm256_maddubs_epi16(block, weights); - b_v = _mm256_add_epi32(b_v, _mm256_madd_epi16(mad, one_v)); - } - - b_v = _mm256_add_epi32(b_v, _mm256_slli_epi32(p_v, 5)); - - *a += reduce_add(a_v); - *b = reduce_add(b_v); - - blocks_remainder - } - - #[inline(always)] - unsafe fn reduce_add(v: __m256i) -> u32 { - let sum = _mm_add_epi32(_mm256_castsi256_si128(v), _mm256_extracti128_si256(v, 1)); - let hi = _mm_unpackhi_epi64(sum, sum); - - let sum = _mm_add_epi32(hi, sum); - let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); - - let sum = _mm_add_epi32(sum, hi); - - _mm_cvtsi128_si32(sum) as _ - } - - #[inline(always)] - unsafe fn get_weights() -> __m256i { - _mm256_set_epi8( - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 32, - ) - } -} - -#[cfg(test)] -mod tests { - use rand::Rng; - - #[test] - fn zeroes() { - assert_sum_eq(&[]); - assert_sum_eq(&[0]); - assert_sum_eq(&[0, 0]); - assert_sum_eq(&[0; 100]); - assert_sum_eq(&[0; 1024]); - assert_sum_eq(&[0; 1024 * 1024]); - } - - #[test] - fn ones() { - assert_sum_eq(&[]); - assert_sum_eq(&[1]); - assert_sum_eq(&[1, 1]); - assert_sum_eq(&[1; 100]); - assert_sum_eq(&[1; 1024]); - assert_sum_eq(&[1; 1024 * 1024]); - } - - #[test] - fn random() { - let mut random = [0; 1024 * 1024]; - rand::thread_rng().fill(&mut random[..]); - - assert_sum_eq(&random[..1]); - assert_sum_eq(&random[..100]); - assert_sum_eq(&random[..1024]); - assert_sum_eq(&random[..1024 * 1024]); - } - - /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. - #[test] - fn wiki() { - assert_sum_eq(b"Wikipedia"); - } - - fn assert_sum_eq(data: &[u8]) { - if let Some(update) = super::get_imp() { - let (a, b) = update(1, 0, data); - let left = u32::from(b) << 16 | u32::from(a); - let right = adler::adler32_slice(data); - - assert_eq!(left, right, "len({})", data.len()); - } - } -} diff --git a/src/imp/mod.rs b/src/imp/mod.rs deleted file mode 100644 index 957b50a..0000000 --- a/src/imp/mod.rs +++ /dev/null @@ -1,23 +0,0 @@ -pub mod avx2; -pub mod avx512; -pub mod scalar; -pub mod sse2; -pub mod ssse3; -pub mod wasm; - -pub type Adler32Imp = fn(u16, u16, &[u8]) -> (u16, u16); - -#[inline] -#[allow(non_snake_case)] -pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { - ((z << 6) | (y << 4) | (x << 2) | w) as i32 -} - -pub fn get_imp() -> Adler32Imp { - avx512::get_imp() - .or_else(avx2::get_imp) - .or_else(ssse3::get_imp) - .or_else(sse2::get_imp) - .or_else(wasm::get_imp) - .unwrap_or(scalar::update) -} diff --git a/src/imp/scalar.rs b/src/imp/scalar.rs deleted file mode 100644 index 558813e..0000000 --- a/src/imp/scalar.rs +++ /dev/null @@ -1,69 +0,0 @@ -const MOD: u32 = 65521; -const NMAX: usize = 5552; - -pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { - let mut a = a as u32; - let mut b = b as u32; - - let chunks = data.chunks_exact(NMAX); - let remainder = chunks.remainder(); - - for chunk in chunks { - for byte in chunk { - a = a.wrapping_add(*byte as _); - b = b.wrapping_add(a); - } - - a %= MOD; - b %= MOD; - } - - for byte in remainder { - a = a.wrapping_add(*byte as _); - b = b.wrapping_add(a); - } - - a %= MOD; - b %= MOD; - - (a as u16, b as u16) -} - -#[cfg(test)] -mod tests { - #[test] - fn zeroes() { - assert_eq!(adler32(&[]), 1); - assert_eq!(adler32(&[0]), 1 | 1 << 16); - assert_eq!(adler32(&[0, 0]), 1 | 2 << 16); - assert_eq!(adler32(&[0; 100]), 0x00640001); - assert_eq!(adler32(&[0; 1024]), 0x04000001); - assert_eq!(adler32(&[0; 1024 * 1024]), 0x00f00001); - } - - #[test] - fn ones() { - assert_eq!(adler32(&[0xff; 1024]), 0x79a6fc2e); - assert_eq!(adler32(&[0xff; 1024 * 1024]), 0x8e88ef11); - } - - #[test] - fn mixed() { - assert_eq!(adler32(&[1]), 2 | 2 << 16); - assert_eq!(adler32(&[40]), 41 | 41 << 16); - - assert_eq!(adler32(&[0xA5; 1024 * 1024]), 0xd5009ab1); - } - - /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. - #[test] - fn wiki() { - assert_eq!(adler32(b"Wikipedia"), 0x11E60398); - } - - fn adler32(data: &[u8]) -> u32 { - let (a, b) = super::update(1, 0, data); - - u32::from(b) << 16 | u32::from(a) - } -} diff --git a/src/imp/sse2.rs b/src/imp/sse2.rs deleted file mode 100644 index b76df52..0000000 --- a/src/imp/sse2.rs +++ /dev/null @@ -1,233 +0,0 @@ -use super::Adler32Imp; - -/// Resolves update implementation if CPU supports sse2 instructions. -pub fn get_imp() -> Option { - get_imp_inner() -} - -#[inline] -#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] -fn get_imp_inner() -> Option { - if std::is_x86_feature_detected!("sse2") { - Some(imp::update) - } else { - None - } -} - -#[inline] -#[cfg(all( - target_feature = "sse2", - not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) -))] -fn get_imp_inner() -> Option { - Some(imp::update) -} - -#[inline] -#[cfg(all( - not(target_feature = "sse2"), - not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) -))] -fn get_imp_inner() -> Option { - None -} - -#[cfg(all( - any(target_arch = "x86", target_arch = "x86_64"), - any(feature = "std", target_feature = "sse2") -))] -mod imp { - const MOD: u32 = 65521; - const NMAX: usize = 5552; - const BLOCK_SIZE: usize = 32; - const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; - - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { - unsafe { update_imp(a, b, data) } - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { - let mut a = a as u32; - let mut b = b as u32; - - let chunks = data.chunks_exact(CHUNK_SIZE); - let remainder = chunks.remainder(); - for chunk in chunks { - update_chunk_block(&mut a, &mut b, chunk); - } - - update_block(&mut a, &mut b, remainder); - - (a as u16, b as u16) - } - - unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { - debug_assert_eq!( - chunk.len(), - CHUNK_SIZE, - "Unexpected chunk size (expected {}, got {})", - CHUNK_SIZE, - chunk.len() - ); - - reduce_add_blocks(a, b, chunk); - - *a %= MOD; - *b %= MOD; - } - - unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { - debug_assert!( - chunk.len() <= CHUNK_SIZE, - "Unexpected chunk size (expected <= {}, got {})", - CHUNK_SIZE, - chunk.len() - ); - - for byte in reduce_add_blocks(a, b, chunk) { - *a += *byte as u32; - *b += *a; - } - - *a %= MOD; - *b %= MOD; - } - - #[inline(always)] - unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { - if chunk.len() < BLOCK_SIZE { - return chunk; - } - - let blocks = chunk.chunks_exact(BLOCK_SIZE); - let blocks_remainder = blocks.remainder(); - - let zero_v = _mm_setzero_si128(); - let weight_hi_v = get_weight_hi(); - let weight_lo_v = get_weight_lo(); - - let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _); - let mut a_v = _mm_setzero_si128(); - let mut b_v = _mm_set_epi32(0, 0, 0, *b as _); - - for block in blocks { - let block_ptr = block.as_ptr() as *const _; - let left_v = _mm_loadu_si128(block_ptr); - let right_v = _mm_loadu_si128(block_ptr.add(1)); - - p_v = _mm_add_epi32(p_v, a_v); - - a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v)); - let mad = maddubs(left_v, weight_hi_v); - b_v = _mm_add_epi32(b_v, mad); - - a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v)); - let mad = maddubs(right_v, weight_lo_v); - b_v = _mm_add_epi32(b_v, mad); - } - - b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5)); - - *a += reduce_add(a_v); - *b = reduce_add(b_v); - - blocks_remainder - } - - #[inline(always)] - unsafe fn maddubs(a: __m128i, b: __m128i) -> __m128i { - let a_lo = _mm_unpacklo_epi8(a, _mm_setzero_si128()); - let a_hi = _mm_unpackhi_epi8(a, _mm_setzero_si128()); - - let b_lo = _mm_unpacklo_epi8(b, _mm_setzero_si128()); - let b_hi = _mm_unpackhi_epi8(b, _mm_setzero_si128()); - - let lo = _mm_madd_epi16(a_lo, b_lo); - let hi = _mm_madd_epi16(a_hi, b_hi); - - _mm_add_epi32(lo, hi) - } - - #[inline(always)] - unsafe fn reduce_add(v: __m128i) -> u32 { - let hi = _mm_unpackhi_epi64(v, v); - let sum = _mm_add_epi32(hi, v); - let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); - - let sum = _mm_add_epi32(sum, hi); - - _mm_cvtsi128_si32(sum) as _ - } - - #[inline(always)] - unsafe fn get_weight_lo() -> __m128i { - _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - } - - #[inline(always)] - unsafe fn get_weight_hi() -> __m128i { - _mm_set_epi8( - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - ) - } -} - -#[cfg(test)] -mod tests { - use rand::Rng; - - #[test] - fn zeroes() { - assert_sum_eq(&[]); - assert_sum_eq(&[0]); - assert_sum_eq(&[0, 0]); - assert_sum_eq(&[0; 100]); - assert_sum_eq(&[0; 1024]); - assert_sum_eq(&[0; 1024 * 1024]); - } - - #[test] - fn ones() { - assert_sum_eq(&[]); - assert_sum_eq(&[1]); - assert_sum_eq(&[1, 1]); - assert_sum_eq(&[1; 100]); - assert_sum_eq(&[1; 1024]); - assert_sum_eq(&[1; 1024 * 1024]); - } - - #[test] - fn random() { - let mut random = [0; 1024 * 1024]; - rand::thread_rng().fill(&mut random[..]); - - assert_sum_eq(&random[..1]); - assert_sum_eq(&random[..100]); - assert_sum_eq(&random[..1024]); - assert_sum_eq(&random[..1024 * 1024]); - } - - /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. - #[test] - fn wiki() { - assert_sum_eq(b"Wikipedia"); - } - - fn assert_sum_eq(data: &[u8]) { - if let Some(update) = super::get_imp() { - let (a, b) = update(1, 0, data); - let left = u32::from(b) << 16 | u32::from(a); - let right = adler::adler32_slice(data); - - assert_eq!(left, right, "len({})", data.len()); - } - } -} diff --git a/src/imp/ssse3.rs b/src/imp/ssse3.rs deleted file mode 100644 index 2602d47..0000000 --- a/src/imp/ssse3.rs +++ /dev/null @@ -1,219 +0,0 @@ -use super::Adler32Imp; - -/// Resolves update implementation if CPU supports ssse3 instructions. -pub fn get_imp() -> Option { - get_imp_inner() -} - -#[inline] -#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] -fn get_imp_inner() -> Option { - if std::is_x86_feature_detected!("ssse3") { - Some(imp::update) - } else { - None - } -} - -#[inline] -#[cfg(all( - target_feature = "ssse3", - not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) -))] -fn get_imp_inner() -> Option { - Some(imp::update) -} - -#[inline] -#[cfg(all( - not(target_feature = "ssse3"), - not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) -))] -fn get_imp_inner() -> Option { - None -} - -#[cfg(all( - any(target_arch = "x86", target_arch = "x86_64"), - any(feature = "std", target_feature = "ssse3") -))] -mod imp { - const MOD: u32 = 65521; - const NMAX: usize = 5552; - const BLOCK_SIZE: usize = 32; - const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; - - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { - unsafe { update_imp(a, b, data) } - } - - #[inline] - #[target_feature(enable = "ssse3")] - unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { - let mut a = a as u32; - let mut b = b as u32; - - let chunks = data.chunks_exact(CHUNK_SIZE); - let remainder = chunks.remainder(); - for chunk in chunks { - update_chunk_block(&mut a, &mut b, chunk); - } - - update_block(&mut a, &mut b, remainder); - - (a as u16, b as u16) - } - - unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { - debug_assert_eq!( - chunk.len(), - CHUNK_SIZE, - "Unexpected chunk size (expected {}, got {})", - CHUNK_SIZE, - chunk.len() - ); - - reduce_add_blocks(a, b, chunk); - - *a %= MOD; - *b %= MOD; - } - - unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { - debug_assert!( - chunk.len() <= CHUNK_SIZE, - "Unexpected chunk size (expected <= {}, got {})", - CHUNK_SIZE, - chunk.len() - ); - - for byte in reduce_add_blocks(a, b, chunk) { - *a += *byte as u32; - *b += *a; - } - - *a %= MOD; - *b %= MOD; - } - - #[inline(always)] - unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { - if chunk.len() < BLOCK_SIZE { - return chunk; - } - - let blocks = chunk.chunks_exact(BLOCK_SIZE); - let blocks_remainder = blocks.remainder(); - - let one_v = _mm_set1_epi16(1); - let zero_v = _mm_set1_epi16(0); - let weight_hi_v = get_weight_hi(); - let weight_lo_v = get_weight_lo(); - - let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _); - let mut a_v = _mm_set_epi32(0, 0, 0, 0); - let mut b_v = _mm_set_epi32(0, 0, 0, *b as _); - - for block in blocks { - let block_ptr = block.as_ptr() as *const _; - let left_v = _mm_loadu_si128(block_ptr); - let right_v = _mm_loadu_si128(block_ptr.add(1)); - - p_v = _mm_add_epi32(p_v, a_v); - - a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v)); - let mad = _mm_maddubs_epi16(left_v, weight_hi_v); - b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v)); - - a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v)); - let mad = _mm_maddubs_epi16(right_v, weight_lo_v); - b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v)); - } - - b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5)); - - *a += reduce_add(a_v); - *b = reduce_add(b_v); - - blocks_remainder - } - - #[inline(always)] - unsafe fn reduce_add(v: __m128i) -> u32 { - let hi = _mm_unpackhi_epi64(v, v); - let sum = _mm_add_epi32(hi, v); - let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); - let sum = _mm_add_epi32(sum, hi); - - _mm_cvtsi128_si32(sum) as _ - } - - #[inline(always)] - unsafe fn get_weight_lo() -> __m128i { - _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - } - - #[inline(always)] - unsafe fn get_weight_hi() -> __m128i { - _mm_set_epi8( - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - ) - } -} - -#[cfg(test)] -mod tests { - use rand::Rng; - - #[test] - fn zeroes() { - assert_sum_eq(&[]); - assert_sum_eq(&[0]); - assert_sum_eq(&[0, 0]); - assert_sum_eq(&[0; 100]); - assert_sum_eq(&[0; 1024]); - assert_sum_eq(&[0; 1024 * 1024]); - } - - #[test] - fn ones() { - assert_sum_eq(&[]); - assert_sum_eq(&[1]); - assert_sum_eq(&[1, 1]); - assert_sum_eq(&[1; 100]); - assert_sum_eq(&[1; 1024]); - assert_sum_eq(&[1; 1024 * 1024]); - } - - #[test] - fn random() { - let mut random = [0; 1024 * 1024]; - rand::thread_rng().fill(&mut random[..]); - - assert_sum_eq(&random[..1]); - assert_sum_eq(&random[..100]); - assert_sum_eq(&random[..1024]); - assert_sum_eq(&random[..1024 * 1024]); - } - - /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. - #[test] - fn wiki() { - assert_sum_eq(b"Wikipedia"); - } - - fn assert_sum_eq(data: &[u8]) { - if let Some(update) = super::get_imp() { - let (a, b) = update(1, 0, data); - let left = u32::from(b) << 16 | u32::from(a); - let right = adler::adler32_slice(data); - - assert_eq!(left, right, "len({})", data.len()); - } - } -} diff --git a/src/lib.rs b/src/lib.rs index 1ee545e..0743d81 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,20 +82,18 @@ feature(stdarch_x86_avx512, avx512_target_feature) )] -#[doc(hidden)] +pub mod arch; pub mod hash; -#[doc(hidden)] -pub mod imp; +pub mod update; -pub use hash::*; -use imp::{get_imp, Adler32Imp}; +use update::Adler32Update; /// An adler32 hash generator type. #[derive(Clone)] pub struct Adler32 { a: u16, b: u16, - update: Adler32Imp, + update: Adler32Update, } impl Adler32 { @@ -129,7 +127,7 @@ impl Adler32 { Self { a: checksum as u16, b: (checksum >> 16) as u16, - update: get_imp(), + update: update::best(), } } @@ -184,7 +182,7 @@ impl Default for Adler32 { Self { a: 1, b: 0, - update: get_imp(), + update: update::best(), } } } @@ -291,23 +289,3 @@ pub mod bufread { } } } - -#[cfg(test)] -mod tests { - #[test] - fn test_from_checksum() { - let buf = b"rust is pretty cool man"; - let sum = 0xdeadbeaf; - - let mut simd = super::Adler32::from_checksum(sum); - let mut adler = adler::Adler32::from_checksum(sum); - - simd.write(buf); - adler.write_slice(buf); - - let simd = simd.finish(); - let scalar = adler.checksum(); - - assert_eq!(simd, scalar); - } -} diff --git a/src/update.rs b/src/update.rs new file mode 100644 index 0000000..438e00a --- /dev/null +++ b/src/update.rs @@ -0,0 +1,36 @@ +/// Return the 16bit Adler-32 checksum pair from the given seed pair and bytes. +pub type Adler32Update = fn(u16, u16, &[u8]) -> (u16, u16); + +/// Returns the [Adler32Update] function that runs best on the target system. +pub fn best() -> Adler32Update { + cfg_if::cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + fn best_for_target_arch() -> Adler32Update { + use crate::arch::x86::*; + use crate::arch::scalar; + + avx512::get_update_if_supported() + .or_else(avx2::get_update_if_supported) + .or_else(ssse3::get_update_if_supported) + .or_else(sse2::get_update_if_supported) + .unwrap_or(scalar::update) + } + } else if #[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))] { + fn best_for_target_arch() -> Adler32Update { + use crate::arch::wasm; + use crate::arch::scalar; + + wasm::get_update_if_supported() + .unwrap_or(scalar::update) + } + } else { + fn best_for_target_arch() -> Adler32Update { + use crate::arch::scalar; + + scalar::update + } + } + } + + best_for_target_arch() +}