From 86d9beb26e38f40b0b1314aa7dd36772142c9178 Mon Sep 17 00:00:00 2001 From: Ulderico Cirello Date: Wed, 17 Jan 2024 11:39:07 -0800 Subject: [PATCH] Extract byteseeker into its own package --- filter_test.go | 56 +++---------------------------- internal/byteseeker/byteseeker.go | 51 ++++++++++++++++++++++++++++ utils.go | 12 ++++--- 3 files changed, 62 insertions(+), 57 deletions(-) create mode 100644 internal/byteseeker/byteseeker.go diff --git a/filter_test.go b/filter_test.go index 222f62b..688c109 100644 --- a/filter_test.go +++ b/filter_test.go @@ -2,59 +2,11 @@ package splitblockbloom import ( "bytes" - "errors" "fmt" - "io" "testing" -) - -// ByteSliceReadSeeker implements the io.ReadSeeker interface for a byte slice. -type ByteSliceReadSeeker struct { - slice []byte - offset int64 -} - -// NewByteSliceReadSeeker creates a new ByteSliceReadSeeker. -func NewByteSliceReadSeeker(slice []byte) *ByteSliceReadSeeker { - return &ByteSliceReadSeeker{slice: slice, offset: 0} -} - -// Read implements the Read method of the io.ReadSeeker interface. -func (r *ByteSliceReadSeeker) Read(p []byte) (int, error) { - if r.offset >= int64(len(r.slice)) { - return 0, io.EOF // end of slice - } - - n := copy(p, r.slice[r.offset:]) - r.offset += int64(n) - return n, nil -} -// Seek implements the Seek method of the io.ReadSeeker interface. -func (r *ByteSliceReadSeeker) Seek(offset int64, whence int) (int64, error) { - var newOffset int64 - switch whence { - case io.SeekStart: - newOffset = offset - case io.SeekCurrent: - newOffset = r.offset + offset - case io.SeekEnd: - newOffset = int64(len(r.slice)) + offset - default: - return 0, errors.New("invalid whence") - } - - if newOffset < 0 { - return 0, errors.New("negative position") - } - - if newOffset > int64(len(r.slice)) { - return 0, errors.New("position out of bounds") - } - - r.offset = newOffset - return newOffset, nil -} + "github.com/axiomhq/splitblockbloom/internal/byteseeker" +) func TestFilter(t *testing.T) { bb := NewFilter(1e6, 0.004) @@ -90,7 +42,7 @@ func TestFilter(t *testing.T) { t.Log("wrote:", n, "bytes", "len:", len(b)) for i := 0; i < 1e6; i++ { - ok, err := ContainsFromStream(NewByteSliceReadSeeker(b), len(bb), []byte(fmt.Sprintf("val%d", i))) + ok, err := ContainsFromStream(&byteseeker.Buffer{B: b}, len(bb), []byte(fmt.Sprintf("val%d", i))) if err != nil { t.Fatal(err) } @@ -101,7 +53,7 @@ func TestFilter(t *testing.T) { errs = 0 for i := int(1e6); i < 10e6; i++ { - ok, err := ContainsFromStream(NewByteSliceReadSeeker(b), len(bb), []byte(fmt.Sprintf("val%d", i))) + ok, err := ContainsFromStream(&byteseeker.Buffer{B: b}, len(bb), []byte(fmt.Sprintf("val%d", i))) if err != nil { t.Fatal(err) } diff --git a/internal/byteseeker/byteseeker.go b/internal/byteseeker/byteseeker.go new file mode 100644 index 0000000..a90f775 --- /dev/null +++ b/internal/byteseeker/byteseeker.go @@ -0,0 +1,51 @@ +package byteseeker + +import ( + "errors" + "io" +) + +var _ io.ReadSeeker = (*Buffer)(nil) + +// Buffer implements the io.ReadSeeker interface for a byte slice. +type Buffer struct { + B []byte + offset int64 +} + +// Read implements the Read method of the io.ReadSeeker interface. +func (r *Buffer) Read(p []byte) (int, error) { + if r.offset >= int64(len(r.B)) { + return 0, io.EOF // end of slice + } + + n := copy(p, r.B[r.offset:]) + r.offset += int64(n) + return n, nil +} + +// Seek implements the Seek method of the io.ReadSeeker interface. +func (r *Buffer) Seek(offset int64, whence int) (int64, error) { + var newOffset int64 + switch whence { + case io.SeekStart: + newOffset = offset + case io.SeekCurrent: + newOffset = r.offset + offset + case io.SeekEnd: + newOffset = int64(len(r.B)) + offset + default: + return 0, errors.New("invalid whence") + } + + if newOffset < 0 { + return 0, errors.New("negative position") + } + + if newOffset > int64(len(r.B)) { + return 0, errors.New("position out of bounds") + } + + r.offset = newOffset + return newOffset, nil +} diff --git a/utils.go b/utils.go index 3ae766c..e9df834 100644 --- a/utils.go +++ b/utils.go @@ -16,9 +16,11 @@ const ( blockSeed = 0x5c6bfb31 ) -func hash(val []byte, seed uint64) uint64 { return fnv1a.AddBytes64(seed, val) } +func hash(val []byte, seed uint64) uint64 { + return fnv1a.AddBytes64(seed, val) +} -func calcFpp(ndv, bytes float64) float64 { +func calcFalsePositiveRatio(ndv, bytes float64) float64 { if ndv == 0 || bytes <= 0 || ndv/(bytes*8) > 3 { return 1.0 } @@ -41,9 +43,9 @@ func calcFpp(ndv, bytes float64) float64 { return math.Min(result, 1.0) } -func blockBytesNeeded(ndv, fpp float64) uint64 { +func blockBytesNeeded(ndv, desiredFalsePositiveRatio float64) uint64 { result := 1.0 - for calcFpp(ndv, result) > fpp { + for calcFalsePositiveRatio(ndv, result) > desiredFalsePositiveRatio { result *= 2 } if result <= blockSizeInBytes { @@ -53,7 +55,7 @@ func blockBytesNeeded(ndv, fpp float64) uint64 { lo, hi := 0.0, result for lo < hi-1 { mid := lo + (hi-lo)/2 - if calcFpp(ndv, mid) < fpp { + if calcFalsePositiveRatio(ndv, mid) < desiredFalsePositiveRatio { hi = mid } else { lo = mid