diff --git a/src/include/duckdb/common/bitpacking.hpp b/src/include/duckdb/common/bitpacking.hpp index 185ece082d28..44c7049dfc27 100644 --- a/src/include/duckdb/common/bitpacking.hpp +++ b/src/include/duckdb/common/bitpacking.hpp @@ -15,18 +15,8 @@ #include "duckdb/common/limits.hpp" #include "duckdb/common/numeric_utils.hpp" - -#include - namespace duckdb { - -// ! TEMPORARY STREAM OVERLOAD -std::ostream& operator<<(std::ostream& stream, hugeint_t value) { - stream << value.ToString(); - return stream; -} - using bitpacking_width_t = uint8_t; class BitpackingPrimitives { @@ -220,252 +210,8 @@ class BitpackingPrimitives { return width; } - template - static void UnpackSingleOut128(const uint32_t *__restrict &in, T *__restrict out, uint16_t delta, uint16_t shr) { - if (delta + shr < 32) { - *out = ((static_cast(*in)) >> shr) % (T(1) << delta); - } - - else if (delta + shr >= 32 && delta + shr < 64) { - *out = static_cast(*in) >> shr; - ++in; - - if (delta + shr > 32) { - uint16_t NEXT_SHR = shr + delta - 32; - - *out |= static_cast((*in) % (1U << NEXT_SHR)) << (32 - shr); - } - } - - else if (delta + shr >= 64 && delta + shr < 96) { - *out = static_cast(*in) >> shr; - ++in; - - *out |= static_cast(*in) << (32 - shr); - ++in; - - if (delta + shr > 64) { - uint8_t NEXT_SHR = delta + shr - 64; - *out |= static_cast((*in) % (1U << NEXT_SHR)) << (64 - shr); - } - } - - else if (delta + shr >= 96) { - *out = static_cast(*in) >> shr; - ++in; - - *out |= static_cast(*in) << (32 - shr); - ++in; - - *out |= static_cast(*in) << (64 - shr); - ++in; - - if (delta + shr > 96) { - uint8_t NEXT_SHR = delta + shr - 96; - *out |= static_cast((*in) % (1U << NEXT_SHR)) << (96 - shr); - } - } - } - - template - static void PackSingleIn128(const T in, uint32_t *__restrict &out, uint16_t delta, uint16_t shl, T mask) { - if (delta + shl < 32) { - - if (shl == 0) { - *out = static_cast(in & mask); - } else { - *out |= static_cast((in & mask) << shl); - } - - } - else if (delta + shl >= 32 && delta + shl < 64) { - - if (shl == 0) { - *out = static_cast(in & mask); - } else { - *out |= static_cast((in & mask) << shl); - } - - ++out; - - if (delta + shl > 32) { - *out = static_cast((in & mask) >> (32 - shl)); - } - - } - - else if (delta + shl >= 64 && delta + shl < 96) { - - if (shl == 0) { - *out = static_cast(in & mask); - } else { - *out |= static_cast(in << shl); - } - ++out; - - *out = static_cast((in & mask) >> (32 - shl)); - ++out; - - if (delta + shl > 64) { - *out = static_cast((in & mask) >> (64 - shl)); - } - } - - else if (delta + shl >= 96) { - if (shl == 0) { - *out = static_cast(in & mask); - } else { - *out |= static_cast(in << shl); - } - ++out; - - *out = static_cast((in & mask) >> (32 - shl)); - ++out; - - *out = static_cast((in & mask) >> (64 - shl)); - ++out; - - if (delta + shl > 96) { - *out = static_cast((in & mask) >> (96 - shl)); - } - } - } - - - // Custom packing for hugeints - // DELTA = width - template - static void UnpackSingle(const uint32_t *__restrict &in, T *__restrict out, uint16_t delta, uint16_t oindex) { - - std::cout << "Unpacking... with DELTA: " << (uint32_t)delta << ", SHR: " - << (uint32_t)((delta * oindex) % 32) << ", DELTA+SHR: " << (uint32_t)(delta + (delta * oindex) % 32) << std::endl; - - - if (oindex == 31) { - UnpackLast(in, out, delta); - } else { - UnpackSingleOut128(in, out + oindex, delta, (delta * oindex) % 32); - } - } - - template - static void PackSingle(const T *__restrict in, uint32_t *__restrict &out, uint16_t delta, uint16_t oindex) { - - std::cout << "Packing " << in[oindex] << " with DELTA: " << (uint32_t)delta << ", SHL: " - << (uint32_t)((delta * oindex) % 32) << ", MASK: " << ((T(1) << delta) - 1) << ", DELTA+SHL: " << (uint32_t)(delta + (delta * oindex) % 32) << std::endl; - - if (oindex == 31) { - PackLast(in, out, delta); - } else { - PackSingleIn128(in[oindex], out, delta, (delta * oindex) % 32, (T(1) << delta) - 1); - } - } - - // Final index (31) - template - static void UnpackLast(const uint32_t *__restrict &in, T *__restrict out, uint16_t delta) { - uint16_t shift = (delta * 31) % 32; - out[31] = (*in) >> shift; - if (delta > 32) { - ++in; - out[31] |= static_cast(*in) << (32 - shift); - } - if (delta > 64) { - ++in; - out[31] |= static_cast(*in) << (64 - shift); - } - if (delta > 96) { - ++in; - out[31] |= static_cast(*in) << (96 - shift); - } - } - - template - static void PackLast(const T *__restrict in, uint32_t *__restrict out, uint16_t delta) { - uint16_t shift = (delta * 31) % 32; - *out |= static_cast(in[31] << shift); // What should happen here? - if (delta > 32) { - ++out; - *out = static_cast(in[31] >> (32 - shift)); - } - if (delta > 64) { - ++out; - *out = static_cast(in[31] >> (64 - shift)); - } - if (delta > 96) { - ++out; - *out = static_cast(in[31] >> (96 - shift)); - } - - } - - template - static void PackHugeint(const T *__restrict in, uint32_t *__restrict out, bitpacking_width_t width) { - - if (width == 0) { - return ; - } - - // width 32 - - //? Special cases at certain widths? - if (width == 64) { - for (int i = 0; i < 32; ++i) { - out[2 * i] = static_cast(in[i]); - out[2 * i + 1] = static_cast(in[i] >> 32); - } - return ; - } - - // width 96 - - // width 128 - - for (idx_t oindex = 0; oindex < BITPACKING_ALGORITHM_GROUP_SIZE; ++oindex) { - PackSingle(in, out, width, oindex); - - std::cout << "Packed " << in[oindex] << std::endl; // STREAM OVERLOAD - - } - } - - template - static void UnPackHugeint(const uint32_t *__restrict in, T *__restrict out, bitpacking_width_t width) { - - if (width == 0) { - for (uint32_t i = 0; i < 32; ++i) { - *(out++) = 0; - } - return ; - } - - if (width == 64) { - for (int k = 0; k < 32; ++k) { - out[k] = in[k * 2]; - out[k] |= static_cast(in[k * 2 + 1]) << 32; - } - return ; - } - - //? Special cases at certain widths? - - for (idx_t oindex = 0; oindex < BITPACKING_ALGORITHM_GROUP_SIZE; ++oindex) { - UnpackSingle(in, out, width, oindex); - - std::cout << "Unpacked " << out[oindex] << std::endl; - - } - - // UnpackLast(in, out, width); - } - - template static void PackGroup(data_ptr_t dst, T *values, bitpacking_width_t width) { - - - std::cout << "PackGroup width: " << (uint32_t)width << std::endl; - // packing reinterprets the integral type as it's unsigned counterpart, // except for hugeints which are exclusively signed (for now) PackGroupImpl(dst, reinterpret_cast::type *>(values), width); @@ -475,41 +221,10 @@ class BitpackingPrimitives { static void PackGroupImpl(data_ptr_t dst, T *values, bitpacking_width_t width) { throw InternalException("Unsupported type for bitpacking"); } - template <> - void PackGroupImpl(data_ptr_t dst, uint8_t *values, bitpacking_width_t width) { - duckdb_fastpforlib::fastpack(values, reinterpret_cast(dst), static_cast(width)); - } - template <> - void PackGroupImpl(data_ptr_t dst, uint16_t *values, bitpacking_width_t width) { - duckdb_fastpforlib::fastpack(values, reinterpret_cast(dst), static_cast(width)); - } - template <> - void PackGroupImpl(data_ptr_t dst, uint32_t *values, bitpacking_width_t width) { - duckdb_fastpforlib::fastpack(values, reinterpret_cast(dst), static_cast(width)); - } - template <> - void PackGroupImpl(data_ptr_t dst, uint64_t *values, bitpacking_width_t width) { - duckdb_fastpforlib::fastpack(values, reinterpret_cast(dst), static_cast(width)); - } - template <> - void PackGroupImpl(data_ptr_t dst, hugeint_t *values, bitpacking_width_t width) { - - - std::cout << "Packing these values:" << std::endl; - for (idx_t i = 0; i < BITPACKING_ALGORITHM_GROUP_SIZE; ++i) { - std::cout << '\t' << static_cast(values[i]).ToString() << std::endl; - } - - - PackHugeint(values, reinterpret_cast(dst), static_cast(width)); - } template static void UnPackGroup(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width, bool skip_sign_extension = false) { - - std::cout << "UnPackGroup width: " << (uint32_t)width << std::endl; - UnPackGroupImpl(reinterpret_cast::type *>(dst), src, width); if (NumericLimits::IsSigned() && !skip_sign_extension && width > 0 && width < sizeof(T) * 8) { @@ -522,27 +237,6 @@ class BitpackingPrimitives { throw InternalException("Unsupported type for bitpacking"); } - template <> - void UnPackGroupImpl(uint8_t *dst, data_ptr_t src, bitpacking_width_t width) { - duckdb_fastpforlib::fastunpack(reinterpret_cast(src), dst, static_cast(width)); - } - template <> - void UnPackGroupImpl(uint16_t *dst, data_ptr_t src, bitpacking_width_t width) { - duckdb_fastpforlib::fastunpack(reinterpret_cast(src), dst, static_cast(width)); - } - template <> - void UnPackGroupImpl(uint32_t *dst, data_ptr_t src, bitpacking_width_t width) { - duckdb_fastpforlib::fastunpack(reinterpret_cast(src), dst, static_cast(width)); - } - template <> - void UnPackGroupImpl(uint64_t *dst, data_ptr_t src, bitpacking_width_t width) { - duckdb_fastpforlib::fastunpack(reinterpret_cast(src), dst, static_cast(width)); - } - template <> - void UnPackGroupImpl(hugeint_t *dst, data_ptr_t src, bitpacking_width_t width) { - UnPackHugeint(reinterpret_cast(src), dst, width); - } - }; } // namespace duckdb diff --git a/src/include/duckdb/common/bitpacking_hugeint.hpp b/src/include/duckdb/common/bitpacking_hugeint.hpp new file mode 100644 index 000000000000..99e47e793822 --- /dev/null +++ b/src/include/duckdb/common/bitpacking_hugeint.hpp @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/bitpacking_hugeint.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/bitpacking.hpp" + +namespace duckdb { + +struct HugeIntPacker { + static void Pack(const hugeint_t *__restrict in, uint32_t *__restrict out, bitpacking_width_t width); + static void Unpack(const uint32_t *__restrict in, hugeint_t *__restrict out, bitpacking_width_t width); +}; + + +} // namespace hugeint diff --git a/src/storage/compression/CMakeLists.txt b/src/storage/compression/CMakeLists.txt index 6851e0c6288a..5156569bba14 100644 --- a/src/storage/compression/CMakeLists.txt +++ b/src/storage/compression/CMakeLists.txt @@ -11,6 +11,7 @@ add_library_unity( uncompressed.cpp validity_uncompressed.cpp bitpacking.cpp + bitpacking_hugeint.cpp patas.cpp fsst.cpp) set(ALL_OBJECT_FILES diff --git a/src/storage/compression/bitpacking.cpp b/src/storage/compression/bitpacking.cpp index a93276d1da30..472f13199644 100644 --- a/src/storage/compression/bitpacking.cpp +++ b/src/storage/compression/bitpacking.cpp @@ -1,5 +1,6 @@ #include "duckdb/common/bitpacking.hpp" +#include "duckdb/common/bitpacking_hugeint.hpp" #include "duckdb/common/limits.hpp" #include "duckdb/function/compression/compression.hpp" #include "duckdb/function/compression_function.hpp" @@ -237,7 +238,9 @@ struct BitpackingState { if (can_do_delta) { if (maximum_delta == minimum_delta && mode != BitpackingMode::FOR && mode != BitpackingMode::DELTA_FOR) { - idx_t frame_of_reference = static_cast(compression_buffer[0]); + // FOR needs to be T (considering hugeint is bigger than idx_t) + T frame_of_reference = compression_buffer[0]; + OP::WriteConstantDelta(maximum_delta, static_cast(frame_of_reference), compression_buffer_idx, compression_buffer, compression_buffer_validity, data_ptr); total_size += sizeof(T) + sizeof(T) + sizeof(bitpacking_metadata_encoded_t); @@ -961,4 +964,50 @@ bool BitpackingFun::TypeIsSupported(PhysicalType type) { } } +//===--------------------------------------------------------------------===// +// (Un)pack Group +//===--------------------------------------------------------------------===// + +template <> +void BitpackingPrimitives::UnPackGroupImpl(uint8_t *dst, data_ptr_t src, bitpacking_width_t width) { + duckdb_fastpforlib::fastunpack(reinterpret_cast(src), dst, static_cast(width)); +} +template <> +void BitpackingPrimitives::UnPackGroupImpl(uint16_t *dst, data_ptr_t src, bitpacking_width_t width) { + duckdb_fastpforlib::fastunpack(reinterpret_cast(src), dst, static_cast(width)); +} +template <> +void BitpackingPrimitives::UnPackGroupImpl(uint32_t *dst, data_ptr_t src, bitpacking_width_t width) { + duckdb_fastpforlib::fastunpack(reinterpret_cast(src), dst, static_cast(width)); +} +template <> +void BitpackingPrimitives::UnPackGroupImpl(uint64_t *dst, data_ptr_t src, bitpacking_width_t width) { + duckdb_fastpforlib::fastunpack(reinterpret_cast(src), dst, static_cast(width)); +} +template <> +void BitpackingPrimitives::UnPackGroupImpl(hugeint_t *dst, data_ptr_t src, bitpacking_width_t width) { + HugeIntPacker::Unpack(reinterpret_cast(src), dst, width); +} + +template <> +void BitpackingPrimitives::PackGroupImpl(data_ptr_t dst, uint8_t *values, bitpacking_width_t width) { + duckdb_fastpforlib::fastpack(values, reinterpret_cast(dst), static_cast(width)); +} +template <> +void BitpackingPrimitives::PackGroupImpl(data_ptr_t dst, uint16_t *values, bitpacking_width_t width) { + duckdb_fastpforlib::fastpack(values, reinterpret_cast(dst), static_cast(width)); +} +template <> +void BitpackingPrimitives::PackGroupImpl(data_ptr_t dst, uint32_t *values, bitpacking_width_t width) { + duckdb_fastpforlib::fastpack(values, reinterpret_cast(dst), static_cast(width)); +} +template <> +void BitpackingPrimitives::PackGroupImpl(data_ptr_t dst, uint64_t *values, bitpacking_width_t width) { + duckdb_fastpforlib::fastpack(values, reinterpret_cast(dst), static_cast(width)); +} +template <> +void BitpackingPrimitives::PackGroupImpl(data_ptr_t dst, hugeint_t *values, bitpacking_width_t width) { + HugeIntPacker::Pack(values, reinterpret_cast(dst), width); +} + } // namespace duckdb diff --git a/src/storage/compression/bitpacking_hugeint.cpp b/src/storage/compression/bitpacking_hugeint.cpp new file mode 100644 index 000000000000..2dcab5afe49a --- /dev/null +++ b/src/storage/compression/bitpacking_hugeint.cpp @@ -0,0 +1,329 @@ +#include "duckdb/common/bitpacking.hpp" +#include "duckdb/common/bitpacking_hugeint.hpp" + +#include + +namespace duckdb { +// ! TEMPORARY STREAM OVERLOAD +std::ostream& operator<<(std::ostream& stream, hugeint_t value) { + stream << value.ToString(); + return stream; +} + +static void UnpackSingle(const uint32_t *__restrict &in, hugeint_t *__restrict out, uint16_t delta, uint16_t shr) { + if (delta + shr < 32) { + *out = ((static_cast(*in)) >> shr) % (hugeint_t(1) << delta); + } + + else if (delta + shr >= 32 && delta + shr < 64) { + *out = static_cast(*in) >> shr; + ++in; + + if (delta + shr > 32) { + const uint16_t NEXT_SHR = shr + delta - 32; + + *out |= static_cast((*in) % (1U << NEXT_SHR)) << (32 - shr); + } + } + + else if (delta + shr >= 64 && delta + shr < 96) { + *out = static_cast(*in) >> shr; + ++in; + + *out |= static_cast(*in) << (32 - shr); + ++in; + + if (delta + shr > 64) { + const uint16_t NEXT_SHR = delta + shr - 64; + *out |= static_cast((*in) % (1U << NEXT_SHR)) << (64 - shr); + } + } + + else if (delta + shr >= 96 && delta + shr < 128) { + *out = static_cast(*in) >> shr; + ++in; + + *out |= static_cast(*in) << (32 - shr); + ++in; + + *out |= static_cast(*in) << (64 - shr); + ++in; + + if (delta + shr > 96) { + const uint16_t NEXT_SHR = delta + shr - 96; + *out |= static_cast((*in) % (1U << NEXT_SHR)) << (96 - shr); + } + } + + else if (delta + shr >= 128) { + *out = static_cast(*in) >> shr; + ++in; + + *out |= static_cast(*in) << (32 - shr); + ++in; + + *out |= static_cast(*in) << (64 - shr); + ++in; + + *out |= static_cast(*in) << (96 - shr); + ++in; + + if (delta + shr > 128) { + const uint16_t NEXT_SHR = delta + shr - 128; + *out |= static_cast((*in) % (1U << NEXT_SHR)) << (128 - shr); + } + } + +} + + +static void PackSingle(const hugeint_t in, uint32_t *__restrict &out, uint16_t delta, uint16_t shl, hugeint_t mask) { + if (delta + shl < 32) { + + if (shl == 0) { + *out = static_cast(in & mask); + } else { + *out |= static_cast((in & mask) << shl); + } + + } + else if (delta + shl >= 32 && delta + shl < 64) { + + if (shl == 0) { + *out = static_cast(in & mask); + } else { + *out |= static_cast((in & mask) << shl); + } + + ++out; + + if (delta + shl > 32) { + *out = static_cast((in & mask) >> (32 - shl)); + } + + } + + else if (delta + shl >= 64 && delta + shl < 96) { + + if (shl == 0) { + *out = static_cast(in & mask); + } else { + *out |= static_cast(in << shl); + } + ++out; + + *out = static_cast((in & mask) >> (32 - shl)); + ++out; + + if (delta + shl > 64) { + *out = static_cast((in & mask) >> (64 - shl)); + } + } + + else if (delta + shl >= 96 && delta + shl < 128) { + if (shl == 0) { + *out = static_cast(in & mask); + } else { + *out |= static_cast(in << shl); + } + ++out; + + *out = static_cast((in & mask) >> (32 - shl)); + ++out; + + *out = static_cast((in & mask) >> (64 - shl)); + ++out; + + if (delta + shl > 96) { + *out = static_cast((in & mask) >> (96 - shl)); + } + } + + else if (delta + shl >= 128) { + if (shl == 0) { + *out = static_cast(in & mask); + } else { + *out |= static_cast(in << shl); + } + ++out; + + *out = static_cast((in & mask) >> (32 - shl)); + ++out; + + *out = static_cast((in & mask) >> (64 - shl)); + ++out; + + *out = static_cast((in & mask) >> (96 - shl)); + ++out; + + if (delta + shl > 128) { + *out = static_cast((in & mask) >> (128 - shl)); + } + } + +} + +// Custom packing for hugeints +// DELTA = width +// static void UnpackSingle(const uint32_t *__restrict &in, hugeint_t *__restrict out, uint16_t delta, uint16_t oindex) { + +// std::cout << "Unpacking... with DELTA: " << (uint32_t)delta << ", SHR: " +// << (uint32_t)((delta * oindex) % 32) << ", DELTA+SHR: " << (uint32_t)(delta + (delta * oindex) % 32) << std::endl; + +// UnpackSingle(in, out + oindex, delta, (delta * oindex) % 32); +// } + +// static void PackSingle(const hugeint_t *__restrict in, uint32_t *__restrict &out, uint16_t delta, uint16_t oindex) { + +// std::cout << "Packing " << in[oindex] << " with DELTA: " << (uint32_t)delta << ", SHL: " +// << (uint32_t)((delta * oindex) % 32) << ", MASK: " << ((hugeint_t(1) << delta) - 1) << ", DELTA+SHL: " << (uint32_t)(delta + (delta * oindex) % 32) << std::endl; + +// PackSingle(in[oindex], out, delta, (delta * oindex) % 32, (hugeint_t(1) << delta) - 1); +// } + +static void UnpackLast(const uint32_t *__restrict &in, hugeint_t *__restrict out, uint16_t delta) { + uint16_t shift = (delta * 31) % 32; + out[31] = (*in) >> shift; + if (delta > 32) { + ++in; + out[31] |= static_cast(*in) << (32 - shift); + } + if (delta > 64) { + ++in; + out[31] |= static_cast(*in) << (64 - shift); + } + if (delta > 96) { + ++in; + out[31] |= static_cast(*in) << (96 - shift); + } +} + +static void PackLast(const hugeint_t *__restrict in, uint32_t *__restrict out, uint16_t delta) { + uint16_t shift = (delta * 31) % 32; + *out |= static_cast(in[31] << shift); + if (delta > 32) { + ++out; + *out = static_cast(in[31] >> (32 - shift)); + } + if (delta > 64) { + ++out; + *out = static_cast(in[31] >> (64 - shift)); + } + if (delta > 96) { + ++out; + *out = static_cast(in[31] >> (96 - shift)); + } + +} + +static void PackDelta32(const hugeint_t *__restrict in, uint32_t *__restrict out) { + for (uint8_t i = 0; i < 32; ++i) { + out[i] = static_cast(in[i]); + } +} + +static void PackDelta64(const hugeint_t *__restrict in, uint32_t *__restrict out) { + for (uint8_t i = 0; i < 32; ++i) { + out[2 * i] = static_cast(in[i]); + out[2 * i + 1] = static_cast(in[i] >> 32); + } +} + +static void PackDelta96(const hugeint_t *__restrict in, uint32_t *__restrict out) { + for (uint8_t i = 0; i < 32; ++i) { + out[3 * i] = static_cast(in[i]); + out[3 * i + 1] = static_cast(in[i] >> 32); + out[3 * i + 2] = static_cast(in[i] >> 64); + } +} + +static void PackDelta128(const hugeint_t *__restrict in, uint32_t *__restrict out) { + for (uint8_t i = 0; i < 32; ++i) { + out[4 * i] = static_cast(in[i]); + out[4 * i + 1] = static_cast(in[i] >> 32); + out[4 * i + 2] = static_cast(in[i] >> 64); + out[4 * i + 3] = static_cast(in[i] >> 96); + } +} + +static void UnpackDelta0(const uint32_t *__restrict in, hugeint_t *__restrict out) { + for (uint8_t i = 0; i < 32; ++i) { + *(out++) = 0; + } +} + +static void UnpackDelta32(const uint32_t *__restrict in, hugeint_t *__restrict out) { + for (uint8_t k = 0; k < 32; ++k) { + out[k] = in[k]; + } +} + +static void UnpackDelta64(const uint32_t *__restrict in, hugeint_t *__restrict out) { + for (uint8_t k = 0; k < 32; ++k) { + out[k] = in[k * 2]; + out[k] |= hugeint_t(in[k * 2 + 1]) << 32; + } +} + +static void UnpackDelta96(const uint32_t *__restrict in, hugeint_t *__restrict out) { + for (uint8_t k = 0; k < 32; ++k) { + out[k] = in[k * 3]; + out[k] |= hugeint_t(in[k * 3 + 1]) << 32; + out[k] |= hugeint_t(in[k * 3 + 2]) << 64; + } +} + +static void UnpackDelta128(const uint32_t *__restrict in, hugeint_t *__restrict out) { + for (uint8_t k = 0; k < 32; ++k) { + out[k] = in[k * 4]; + out[k] |= hugeint_t(static_cast(in[k * 4 + 1])) << 32; + out[k] |= hugeint_t(static_cast(in[k * 4 + 2])) << 64; + out[k] |= hugeint_t(static_cast(in[k * 4 + 3])) << 96; + } +} + +void HugeIntPacker::Pack(const hugeint_t *__restrict in, uint32_t *__restrict out, bitpacking_width_t width) { + // std::cout << "packing with WIDTH: " << (uint32_t)width << std::endl; + + switch (width) { + case 0: return ; + case 32: PackDelta32(in, out); return ; + case 64: PackDelta64(in, out); return ; + case 96: PackDelta96(in, out); return ; + case 128: PackDelta128(in, out); return ; + default: break ; + } + for (idx_t oindex = 0; oindex < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE - 1; ++oindex) { + + // std::cout << "Packing " << in[oindex] << " with DELTA: " << (uint32_t)width << ", SHL: " + // << (uint32_t)((width * oindex) % 32) << ", MASK: " << ((hugeint_t(1) << width) - 1) << ", width+SHL: " << (uint32_t)(width + (width * oindex) % 32) << std::endl; + + PackSingle(in[oindex], out, width, (width * oindex) % 32, (hugeint_t(1) << width) - 1); + + // std::cout << "Packed " << in[oindex] << std::endl; // STREAM OVERLOAD + + } + PackLast(in, out, width); +} + +void HugeIntPacker::Unpack(const uint32_t *__restrict in, hugeint_t *__restrict out, bitpacking_width_t width) { + // std::cout << "unpacking with WIDTH: " << (uint32_t)width << std::endl; + + switch (width) { + case 0: UnpackDelta0(in, out); return ; + case 32: UnpackDelta32(in, out); return ; + case 64: UnpackDelta64(in, out); return ; + case 96: UnpackDelta96(in, out); return ; + case 128: UnpackDelta128(in, out); return ; + default: break ; + } + for (idx_t oindex = 0; oindex < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE - 1; ++oindex) { + UnpackSingle(in, out + oindex, width, (width * oindex) % 32); + + // std::cout << "Unpacked " << out[oindex] << std::endl; + + } + UnpackLast(in, out, width); +} + +} // namespace duckdb diff --git a/test/sql/storage/compression/bitpacking/bitpacking_compression_hugeint.test_coverage b/test/sql/storage/compression/bitpacking/bitpacking_compression_hugeint.test_coverage deleted file mode 100644 index 5164f3f71d24..000000000000 --- a/test/sql/storage/compression/bitpacking/bitpacking_compression_hugeint.test_coverage +++ /dev/null @@ -1,234 +0,0 @@ -# name: test/sql/storage/compression/bitpacking/bitpacking_compression_hugeint.test_coverage -# description: Assert bitpacking compression ratio is within reasonable margins for each mode -# group: [bitpacking] - -# load the DB from disk -load __TEST_DIR__/test_bitpacking.db - -#### CONSTANT MODE Compression ratio calculation: - -statement ok -PRAGMA force_compression='bitpacking' - -statement ok -PRAGMA force_bitpacking_mode='constant' - -statement ok -CREATE TABLE test_bitpacked AS SELECT (i//119000::HUGEINT)::HUGEINT AS i FROM range(0, 120000000) tbl(i); - -statement ok -checkpoint - -statement ok -PRAGMA force_compression='uncompressed' - -statement ok -CREATE TABLE test_uncompressed AS SELECT i::HUGEINT FROM range(0, 120000000) tbl(i); - -statement ok -checkpoint - -query I -SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking'; ----- - -query I -SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed'; ----- - -query II -select (uncompressed::FLOAT / bitpacked::FLOAT) > 700, (uncompressed::FLOAT / bitpacked::FLOAT) < 1000 FROM ( - select - (select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked, - (select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed -) ----- -True True - -statement ok -drop table test_bitpacked; -drop table test_uncompressed; - -#### CONSTANT DELTA MODE Compression ratio calculation: - -statement ok -PRAGMA force_compression='bitpacking' - -statement ok -PRAGMA force_bitpacking_mode='constant_delta' - -statement ok -CREATE TABLE test_bitpacked AS SELECT i::HUGEINT AS i FROM range(0, 120000000) tbl(i); - -statement ok -checkpoint - -statement ok -PRAGMA force_compression='uncompressed' - -statement ok -CREATE TABLE test_uncompressed AS SELECT i::HUGEINT AS i FROM range(0, 120000000) tbl(i); - -statement ok -checkpoint - -query I -SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking'; ----- - -query I -SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed'; ----- - -statement ok -checkpoint - -query II -select (uncompressed::FLOAT / bitpacked::FLOAT) > 600, (uncompressed::FLOAT / bitpacked::FLOAT) < 800 FROM ( - select - (select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked, - (select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed -) ----- -True True - -statement ok -drop table test_bitpacked; -drop table test_uncompressed; - -#### DELTA FOR MODE Compression ratio calculation: - -statement ok -PRAGMA force_compression='bitpacking' - -statement ok -PRAGMA force_bitpacking_mode='delta_for' - -statement ok -CREATE TABLE test_bitpacked AS SELECT i//2::HUGEINT AS i FROM range(0, 120000000) tbl(i); - -statement ok -checkpoint - -statement ok -PRAGMA force_compression='uncompressed' - -statement ok -CREATE TABLE test_uncompressed AS SELECT i AS i FROM range(0, 120000000) tbl(i); - -statement ok -checkpoint - -query I -SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking'; ----- - -query I -SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed'; ----- - -statement ok -checkpoint - -# compression of hugeints isnt as efficient as others with delta_for -query II -select (uncompressed::FLOAT / bitpacked::FLOAT) > 20, (uncompressed::FLOAT / bitpacked::FLOAT) < 60 FROM ( - select - (select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked, - (select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed -) ----- -True True - -statement ok -drop table test_bitpacked; -drop table test_uncompressed; - -# FOR MODE Compression ratio calculation: - -statement ok -PRAGMA force_compression='bitpacking' - -statement ok -PRAGMA force_bitpacking_mode='for' - -statement ok -CREATE TABLE test_bitpacked AS SELECT i%2::HUGEINT AS i FROM range(0, 120000000) tbl(i); - -statement ok -checkpoint - -statement ok -PRAGMA force_compression='uncompressed' - -statement ok -CREATE TABLE test_uncompressed AS SELECT i::HUGEINT AS i FROM range(0, 120000000) tbl(i); - -statement ok -checkpoint - -query I -SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking'; ----- - -query I -SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed'; ----- - -statement ok -checkpoint - -query II -select (uncompressed::FLOAT / bitpacked::FLOAT) > 50, (uncompressed::FLOAT / bitpacked::FLOAT) < 60 FROM ( - select - (select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked, - (select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed -) ----- -True True - -statement ok -drop table test_bitpacked; -drop table test_uncompressed; - -statement ok -PRAGMA force_bitpacking_mode='none' - -# Assert that all supported types do in fact compress -foreach type hugeint - -statement ok -PRAGMA force_compression='uncompressed'; - -statement ok -CREATE TABLE test_uncompressed AS SELECT (i%2)::${type} FROM range(0, 2500000) tbl(i); - -statement ok -checkpoint - -statement ok -PRAGMA force_compression='bitpacking' - -statement ok -CREATE TABLE test_bitpacked AS SELECT (i%2)::${type} FROM range(0, 2500000) tbl(i); - -statement ok -checkpoint - -# assert compression ratio >2 wich should be achieved for even the smallest types for this data -query II -select (uncompressed::FLOAT / bitpacked::FLOAT) > 2, CAST(1 as ${type}) FROM ( - select - (select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked, - (select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed -) ----- -1 1 - -statement ok -drop table test_bitpacked - -statement ok -drop table test_uncompressed - -endloop \ No newline at end of file