Skip to content

Commit

Permalink
Extract byteseeker into its own package
Browse files Browse the repository at this point in the history
  • Loading branch information
ucirello committed Jan 17, 2024
1 parent 5eb1846 commit 86d9beb
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 57 deletions.
56 changes: 4 additions & 52 deletions filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,11 @@ package splitblockbloom

import (
"bytes"
"errors"
"fmt"
"io"
"testing"
)

// ByteSliceReadSeeker implements the io.ReadSeeker interface for a byte slice.
type ByteSliceReadSeeker struct {
slice []byte
offset int64
}

// NewByteSliceReadSeeker creates a new ByteSliceReadSeeker.
func NewByteSliceReadSeeker(slice []byte) *ByteSliceReadSeeker {
return &ByteSliceReadSeeker{slice: slice, offset: 0}
}

// Read implements the Read method of the io.ReadSeeker interface.
func (r *ByteSliceReadSeeker) Read(p []byte) (int, error) {
if r.offset >= int64(len(r.slice)) {
return 0, io.EOF // end of slice
}

n := copy(p, r.slice[r.offset:])
r.offset += int64(n)
return n, nil
}

// Seek implements the Seek method of the io.ReadSeeker interface.
func (r *ByteSliceReadSeeker) Seek(offset int64, whence int) (int64, error) {
var newOffset int64
switch whence {
case io.SeekStart:
newOffset = offset
case io.SeekCurrent:
newOffset = r.offset + offset
case io.SeekEnd:
newOffset = int64(len(r.slice)) + offset
default:
return 0, errors.New("invalid whence")
}

if newOffset < 0 {
return 0, errors.New("negative position")
}

if newOffset > int64(len(r.slice)) {
return 0, errors.New("position out of bounds")
}

r.offset = newOffset
return newOffset, nil
}
"github.com/axiomhq/splitblockbloom/internal/byteseeker"
)

func TestFilter(t *testing.T) {
bb := NewFilter(1e6, 0.004)
Expand Down Expand Up @@ -90,7 +42,7 @@ func TestFilter(t *testing.T) {
t.Log("wrote:", n, "bytes", "len:", len(b))

for i := 0; i < 1e6; i++ {
ok, err := ContainsFromStream(NewByteSliceReadSeeker(b), len(bb), []byte(fmt.Sprintf("val%d", i)))
ok, err := ContainsFromStream(&byteseeker.Buffer{B: b}, len(bb), []byte(fmt.Sprintf("val%d", i)))
if err != nil {
t.Fatal(err)
}
Expand All @@ -101,7 +53,7 @@ func TestFilter(t *testing.T) {

errs = 0
for i := int(1e6); i < 10e6; i++ {
ok, err := ContainsFromStream(NewByteSliceReadSeeker(b), len(bb), []byte(fmt.Sprintf("val%d", i)))
ok, err := ContainsFromStream(&byteseeker.Buffer{B: b}, len(bb), []byte(fmt.Sprintf("val%d", i)))
if err != nil {
t.Fatal(err)
}
Expand Down
51 changes: 51 additions & 0 deletions internal/byteseeker/byteseeker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package byteseeker

import (
"errors"
"io"
)

var _ io.ReadSeeker = (*Buffer)(nil)

// Buffer implements the io.ReadSeeker interface for a byte slice.
type Buffer struct {
B []byte
offset int64
}

// Read implements the Read method of the io.ReadSeeker interface.
func (r *Buffer) Read(p []byte) (int, error) {
if r.offset >= int64(len(r.B)) {
return 0, io.EOF // end of slice
}

n := copy(p, r.B[r.offset:])
r.offset += int64(n)
return n, nil
}

// Seek implements the Seek method of the io.ReadSeeker interface.
func (r *Buffer) Seek(offset int64, whence int) (int64, error) {
var newOffset int64
switch whence {
case io.SeekStart:
newOffset = offset
case io.SeekCurrent:
newOffset = r.offset + offset
case io.SeekEnd:
newOffset = int64(len(r.B)) + offset
default:
return 0, errors.New("invalid whence")
}

if newOffset < 0 {
return 0, errors.New("negative position")
}

if newOffset > int64(len(r.B)) {
return 0, errors.New("position out of bounds")
}

r.offset = newOffset
return newOffset, nil
}
12 changes: 7 additions & 5 deletions utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ const (
blockSeed = 0x5c6bfb31
)

func hash(val []byte, seed uint64) uint64 { return fnv1a.AddBytes64(seed, val) }
func hash(val []byte, seed uint64) uint64 {
return fnv1a.AddBytes64(seed, val)
}

func calcFpp(ndv, bytes float64) float64 {
func calcFalsePositiveRatio(ndv, bytes float64) float64 {
if ndv == 0 || bytes <= 0 || ndv/(bytes*8) > 3 {
return 1.0
}
Expand All @@ -41,9 +43,9 @@ func calcFpp(ndv, bytes float64) float64 {
return math.Min(result, 1.0)
}

func blockBytesNeeded(ndv, fpp float64) uint64 {
func blockBytesNeeded(ndv, desiredFalsePositiveRatio float64) uint64 {
result := 1.0
for calcFpp(ndv, result) > fpp {
for calcFalsePositiveRatio(ndv, result) > desiredFalsePositiveRatio {
result *= 2
}
if result <= blockSizeInBytes {
Expand All @@ -53,7 +55,7 @@ func blockBytesNeeded(ndv, fpp float64) uint64 {
lo, hi := 0.0, result
for lo < hi-1 {
mid := lo + (hi-lo)/2
if calcFpp(ndv, mid) < fpp {
if calcFalsePositiveRatio(ndv, mid) < desiredFalsePositiveRatio {
hi = mid
} else {
lo = mid
Expand Down

0 comments on commit 86d9beb

Please sign in to comment.