Skip to content

Commit

Permalink
fix: use offset as a disambiguator (#106)
Browse files Browse the repository at this point in the history
Co-authored-by: Matthew <[email protected]>
  • Loading branch information
kevmo314 and friendlymatthew authored Feb 16, 2024
1 parent aa14851 commit 5ad5baa
Show file tree
Hide file tree
Showing 10 changed files with 331 additions and 170 deletions.
1 change: 1 addition & 0 deletions pkg/appendable/index_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
const CurrentVersion = 1

type DataHandler interface {
btree.DataParser
Synchronize(f *IndexFile, df []byte) error
Format() Format
}
Expand Down
55 changes: 29 additions & 26 deletions pkg/btree/bptree.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,18 @@ type BPTree struct {
tree ReadWriteSeekPager
meta MetaPage

Data []byte
Data []byte
DataParser DataParser
}

func NewBPTree(tree ReadWriteSeekPager, meta MetaPage) *BPTree {
return &BPTree{tree: tree, meta: meta}
}

func NewBPTreeWithData(tree ReadWriteSeekPager, meta MetaPage, data []byte, parser DataParser) *BPTree {
return &BPTree{tree: tree, meta: meta, Data: data, DataParser: parser}
}

func (t *BPTree) root() (*BPTreeNode, MemoryPointer, error) {
mp, err := t.meta.Root()
if err != nil || mp.Length == 0 {
Expand All @@ -38,26 +43,26 @@ func (t *BPTree) root() (*BPTreeNode, MemoryPointer, error) {
return root, mp, nil
}

func (t *BPTree) Find(key []byte) (MemoryPointer, bool, error) {
func (t *BPTree) Find(key ReferencedValue) (ReferencedValue, MemoryPointer, error) {
root, rootOffset, err := t.root()
if err != nil {
return MemoryPointer{}, false, fmt.Errorf("read root node: %w", err)
return ReferencedValue{}, MemoryPointer{}, fmt.Errorf("read root node: %w", err)
}
if root == nil {
return MemoryPointer{}, false, nil
return ReferencedValue{}, MemoryPointer{}, nil
}
path, err := t.traverse(key, root, rootOffset)
if err != nil {
return MemoryPointer{}, false, err
return ReferencedValue{}, MemoryPointer{}, err
}
return path[0].node.Pointer(path[0].index), path[0].found, nil
return path[0].node.Keys[path[0].index], path[0].node.Pointer(path[0].index), nil
}

func (t *BPTree) readNode(ptr MemoryPointer) (*BPTreeNode, error) {
if _, err := t.tree.Seek(int64(ptr.Offset), io.SeekStart); err != nil {
return nil, err
}
node := &BPTreeNode{Data: t.Data}
node := &BPTreeNode{Data: t.Data, DataParser: t.DataParser}
if _, err := node.ReadFrom(t.tree); err != nil {
return nil, err
}
Expand All @@ -67,27 +72,24 @@ func (t *BPTree) readNode(ptr MemoryPointer) (*BPTreeNode, error) {
type TraversalRecord struct {
node *BPTreeNode
index int
found bool
// the offset is useful so we know which page to free when we split
ptr MemoryPointer
}

// traverse returns the path from root to leaf in reverse order (leaf first)
// the last element is always the node passed in
func (t *BPTree) traverse(key []byte, node *BPTreeNode, ptr MemoryPointer) ([]TraversalRecord, error) {
// binary search node.Keys to find the first key greater than key (or gte if leaf)
index, found := slices.BinarySearchFunc(node.Keys, ReferencedValue{Value: key}, func(e ReferencedValue, t ReferencedValue) int {
if cmp := bytes.Compare(e.Value, t.Value); cmp == 0 && !node.leaf() {
return -1
} else {
return cmp
}
})
func (t *BPTree) traverse(key ReferencedValue, node *BPTreeNode, ptr MemoryPointer) ([]TraversalRecord, error) {
// binary search node.Keys to find the first key greater than key
index, found := slices.BinarySearchFunc(node.Keys, key, CompareReferencedValues)

if node.leaf() {
return []TraversalRecord{{node: node, index: index, found: found, ptr: ptr}}, nil
return []TraversalRecord{{node: node, index: index, ptr: ptr}}, nil
}

if found {
// if the key is found, we need to go to the right child
index++
}
child, err := t.readNode(node.Pointer(index))
if err != nil {
return nil, err
Expand All @@ -96,7 +98,7 @@ func (t *BPTree) traverse(key []byte, node *BPTreeNode, ptr MemoryPointer) ([]Tr
if err != nil {
return nil, err
}
return append(path, TraversalRecord{node: node, index: index, found: found, ptr: ptr}), nil
return append(path, TraversalRecord{node: node, index: index, ptr: ptr}), nil
}

func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
Expand All @@ -106,7 +108,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
}
if root == nil {
// special case, create the root as the first node
node := &BPTreeNode{Data: t.Data}
node := &BPTreeNode{Data: t.Data, DataParser: t.DataParser}
node.Keys = []ReferencedValue{key}
node.leafPointers = []MemoryPointer{value}
buf, err := node.MarshalBinary()
Expand All @@ -120,16 +122,17 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
return t.meta.SetRoot(MemoryPointer{Offset: uint64(offset), Length: uint32(len(buf))})
}

path, err := t.traverse(key.Value, root, rootOffset)
path, err := t.traverse(key, root, rootOffset)
if err != nil {
return err
}

// insert the key into the leaf
n := path[0].node
j, _ := slices.BinarySearchFunc(n.Keys, key, func(e ReferencedValue, t ReferencedValue) int {
return bytes.Compare(e.Value, t.Value)
})
j, found := slices.BinarySearchFunc(n.Keys, key, CompareReferencedValues)
if found {
return fmt.Errorf("key already exists")
}
if j == len(n.Keys) {
n.Keys = append(n.Keys, key)
n.leafPointers = append(n.leafPointers, value)
Expand All @@ -151,7 +154,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
midKey := n.Keys[mid]

// n is the left node, m the right node
m := &BPTreeNode{Data: t.Data}
m := &BPTreeNode{Data: t.Data, DataParser: t.DataParser}
if n.leaf() {
m.leafPointers = n.leafPointers[mid:]
m.Keys = n.Keys[mid:]
Expand Down Expand Up @@ -205,7 +208,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
// the parent will be written to disk in the next iteration
} else {
// the root split, so create a new root
p := &BPTreeNode{Data: t.Data}
p := &BPTreeNode{Data: t.Data, DataParser: t.DataParser}
p.Keys = []ReferencedValue{midKey}
p.internalPointers = []uint64{
uint64(noffset), uint64(moffset),
Expand Down
101 changes: 57 additions & 44 deletions pkg/btree/bptree_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package btree

import (
"bytes"
"encoding/binary"
"math/rand"
"testing"
Expand Down Expand Up @@ -30,11 +31,11 @@ func TestBPTree(t *testing.T) {
}
tree := NewBPTree(p, &testMetaPage{})
// find a key that doesn't exist
_, found, err := tree.Find([]byte("hello"))
k, _, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if found {
if len(k.Value) != 0 {
t.Fatal("expected not found")
}
})
Expand All @@ -49,11 +50,11 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("hello")}, MemoryPointer{Offset: 1}); err != nil {
t.Fatal(err)
}
v, found, err := tree.Find([]byte("hello"))
k, v, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !found {
if !bytes.Equal(k.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v.Offset != 1 {
Expand All @@ -74,21 +75,21 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("world")}, MemoryPointer{Offset: 2}); err != nil {
t.Fatal(err)
}
v1, f1, err := tree.Find([]byte("hello"))
k1, v1, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !f1 {
if !bytes.Equal(k1.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v1.Offset != 1 {
t.Fatalf("expected value 1, got %d", v1)
}
v2, f2, err := tree.Find([]byte("world"))
k2, v2, err := tree.Find(ReferencedValue{Value: []byte("world")})
if err != nil {
t.Fatal(err)
}
if !f2 {
if !bytes.Equal(k2.Value, []byte("world")) {
t.Fatal("expected to find key")
}
if v2.Offset != 2 {
Expand All @@ -115,41 +116,41 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("cooow")}, MemoryPointer{Offset: 4}); err != nil {
t.Fatal(err)
}
v1, f1, err := tree.Find([]byte("hello"))
k1, v1, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !f1 {
if !bytes.Equal(k1.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v1.Offset != 1 {
t.Fatalf("expected value 1, got %d", v1)
}
v2, f2, err := tree.Find([]byte("world"))
k2, v2, err := tree.Find(ReferencedValue{Value: []byte("world")})
if err != nil {
t.Fatal(err)
}
if !f2 {
if !bytes.Equal(k2.Value, []byte("world")) {
t.Fatal("expected to find key")
}
if v2.Offset != 2 {
t.Fatalf("expected value 2, got %d", v2)
}
v3, f3, err := tree.Find([]byte("moooo"))
k3, v3, err := tree.Find(ReferencedValue{Value: []byte("moooo")})
if err != nil {
t.Fatal(err)
}
if !f3 {
if !bytes.Equal(k3.Value, []byte("moooo")) {
t.Fatal("expected to find key")
}
if v3.Offset != 3 {
t.Fatalf("expected value 3, got %d", v3)
}
v4, f4, err := tree.Find([]byte("cooow"))
k4, v4, err := tree.Find(ReferencedValue{Value: []byte("cooow")})
if err != nil {
t.Fatal(err)
}
if !f4 {
if !bytes.Equal(k4.Value, []byte("cooow")) {
t.Fatal("expected to find key")
}
if v4.Offset != 4 {
Expand Down Expand Up @@ -180,37 +181,45 @@ func TestBPTree(t *testing.T) {
t.Fatal(err)
}
})
}

t.Run("insertion test", func(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
func TestBPTree_SequentialInsertionTest(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
for i := 0; i < 256; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
}
for i := 0; i < 256; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
k, v, err := tree.Find(ReferencedValue{Value: buf})
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
for i := 0; i < 16384; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
if !bytes.Equal(k.Value, buf) {
t.Fatalf("expected to find key %d", i)
}
for i := 0; i < 16384; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
v, found, err := tree.Find(buf)
if err != nil {
t.Fatal(err)
}
if !found {
t.Fatalf("expected to find key %d", i)
}
if v.Offset != uint64(i) {
t.Fatalf("expected value %d, got %d", i, v)
}
if v.Offset != uint64(i) {
t.Fatalf("expected value %d, got %d", i, v)
}
})
}
}

type StubDataParser struct{}

func (s *StubDataParser) Parse(value []byte) []byte {
return []byte{1, 2, 3, 4, 5, 6, 7, 8}
}

func TestBPTree_RandomTests(t *testing.T) {
t.Run("random insertion test", func(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
Expand All @@ -234,11 +243,11 @@ func TestBPTree(t *testing.T) {
if _, err := s.Read(buf); err != nil {
t.Fatal(err)
}
v, found, err := tree.Find(buf)
k, v, err := tree.Find(ReferencedValue{Value: buf})
if err != nil {
t.Fatal(err)
}
if !found {
if !bytes.Equal(k.Value, buf) {
t.Fatalf("expected to find key %d", i)
}
if v.Offset != uint64(i) {
Expand All @@ -253,9 +262,13 @@ func TestBPTree(t *testing.T) {
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
tree := NewBPTreeWithData(p, &testMetaPage{}, make([]byte, 65536*4+8), &StubDataParser{})
for i := 0; i < 65536*4; i++ {
if err := tree.Insert(ReferencedValue{Value: []byte{1, 2, 3, 4, 5, 6, 7, 8}}, MemoryPointer{Offset: uint64(i)}); err != nil {
if err := tree.Insert(ReferencedValue{
Value: []byte{1, 2, 3, 4, 5, 6, 7, 8},
// DataPointer is used as a disambiguator.
DataPointer: MemoryPointer{Offset: uint64(i), Length: 8},
}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/btree/multi.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ func (m *LinkedMetaPage) SetRoot(mp MemoryPointer) error {
//
// Generally, passing data is required, however if the tree
// consists of only inlined values, it is not necessary.
func (m *LinkedMetaPage) BPTree(data []byte) *BPTree {
func (m *LinkedMetaPage) BPTree(data []byte, parser DataParser) *BPTree {
t := NewBPTree(m.rws, m)
if data != nil {
t.Data = data
t.DataParser = parser
}
return t
}
Expand Down
Loading

0 comments on commit 5ad5baa

Please sign in to comment.