From a056be9cb7b2d96fcf8ef52e29c02e3c171ee07c Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Mon, 1 Jan 2024 23:57:22 -0500 Subject: [PATCH] feat: update node for B+ tree behavior --- pkg/btree/node.go | 140 +++++++++++++++++------------------ pkg/btree/node_test.go | 164 ----------------------------------------- 2 files changed, 69 insertions(+), 235 deletions(-) delete mode 100644 pkg/btree/node_test.go diff --git a/pkg/btree/node.go b/pkg/btree/node.go index 9560f274..49fb13da 100644 --- a/pkg/btree/node.go +++ b/pkg/btree/node.go @@ -1,111 +1,109 @@ package btree import ( + "bytes" + "encoding/binary" "io" - - "github.com/kevmo314/appendable/pkg/encoding" ) -type Node struct { - Keys []DataPointer - Children []uint64 - Leaf bool +// MemoryPointer is a uint64 offset and uint32 length +type MemoryPointer struct { + Offset uint64 + Length uint32 } -type DataPointer struct { - RecordOffset uint64 - FieldOffset, Length uint32 +type BPTreeNode struct { + // contains the offset of the child node or the offset of the record for leaf + // if the node is a leaf, the last pointer is the offset of the next leaf + Pointers []MemoryPointer + Keys [][]byte } -func (p DataPointer) Value(r io.ReadSeeker) ([]byte, error) { - buf := make([]byte, p.Length) - if _, err := r.Seek(int64(p.RecordOffset+uint64(p.FieldOffset)), io.SeekStart); err != nil { - return nil, err - } - n, err := r.Read(buf) - if err != nil { - return nil, err - } - if n != int(p.Length) { - return nil, io.ErrUnexpectedEOF - } - return buf, nil +func (n *BPTreeNode) leaf() bool { + // leafs contain the same number of pointers as keys + return len(n.Pointers) == len(n.Keys) } -func (n *Node) WriteTo(w io.Writer) (int64, error) { - size := len(n.Keys) - if n.Leaf { - // mark the first bit +func (n *BPTreeNode) WriteTo(w io.Writer) (int64, error) { + size := byte(len(n.Keys)) + // set the first bit to 1 if it's a leaf + if n.leaf() { size |= 1 << 7 } - if err := encoding.WriteUint8(w, uint8(size)); err != nil { + if err := binary.Write(w, binary.BigEndian, size); err != nil { return 0, err } - for _, key := range n.Keys { - if err := encoding.WriteUint64(w, key.RecordOffset); err != nil { + ct := 1 + for _, k := range n.Keys { + if err := binary.Write(w, binary.BigEndian, uint32(len(k))); err != nil { return 0, err } - if err := encoding.WriteUint32(w, key.FieldOffset); err != nil { + m, err := w.Write(k) + if err != nil { return 0, err } - if err := encoding.WriteUint32(w, key.Length); err != nil { + ct += m + 4 + } + for _, p := range n.Pointers { + if err := binary.Write(w, binary.BigEndian, p.Offset); err != nil { return 0, err } - } - if !n.Leaf { - for _, child := range n.Children { - if err := encoding.WriteUint64(w, child); err != nil { - return 0, err - } + if err := binary.Write(w, binary.BigEndian, p.Length); err != nil { + return 0, err } + ct += 12 } - return int64(1 + 16*len(n.Keys) + 8*len(n.Children)), nil + return int64(ct), nil } -func (n *Node) ReadFrom(r io.Reader) (int64, error) { - size, err := encoding.ReadUint8(r) - if err != nil { +func (n *BPTreeNode) ReadFrom(r io.Reader) (int64, error) { + var size byte + if err := binary.Read(r, binary.BigEndian, &size); err != nil { return 0, err } - n.Leaf = size&(1<<7) != 0 - size = size & (1<<7 - 1) - n.Keys = make([]DataPointer, size) - for i := 0; i < int(size); i++ { - recordOffset, err := encoding.ReadUint64(r) - if err != nil { + leaf := size&(1<<7) != 0 + if leaf { + n.Pointers = make([]MemoryPointer, size&0x7f) + } else { + n.Pointers = make([]MemoryPointer, (size&0x7f)+1) + } + n.Keys = make([][]byte, size&0x7f) + m := 1 + for i := range n.Keys { + var l uint32 + if err := binary.Read(r, binary.BigEndian, &l); err != nil { return 0, err } - fieldOffset, err := encoding.ReadUint32(r) - if err != nil { + n.Keys[i] = make([]byte, l) + if _, err := io.ReadFull(r, n.Keys[i]); err != nil { return 0, err } - length, err := encoding.ReadUint32(r) - if err != nil { + m += 4 + int(l) + } + for i := range n.Pointers { + if err := binary.Read(r, binary.BigEndian, &n.Pointers[i].Offset); err != nil { return 0, err } - n.Keys[i] = DataPointer{ - RecordOffset: recordOffset, - FieldOffset: fieldOffset, - Length: length, - } - } - if !n.Leaf { - n.Children = make([]uint64, size+1) - for i := 0; i <= int(size); i++ { - child, err := encoding.ReadUint64(r) - if err != nil { - return 0, err - } - n.Children[i] = child + if err := binary.Read(r, binary.BigEndian, &n.Pointers[i].Length); err != nil { + return 0, err } + m += 12 } - return 1 + 16*int64(size) + 8*int64(size+1), nil + return int64(m), nil } -func (n *Node) Clone() *Node { - return &Node{ - Keys: n.Keys[:], - Children: n.Children[:], - Leaf: n.Leaf, +func (n *BPTreeNode) bsearch(key []byte) (int, bool) { + i, j := 0, len(n.Keys)-1 + for i <= j { + m := (i + j) / 2 + cmp := bytes.Compare(key, n.Keys[m]) + if cmp == 0 { + return m, true + } else if cmp < 0 { + j = m - 1 + } else { + i = m + 1 + } } + return i, false } diff --git a/pkg/btree/node_test.go b/pkg/btree/node_test.go deleted file mode 100644 index 3e3eb34c..00000000 --- a/pkg/btree/node_test.go +++ /dev/null @@ -1,164 +0,0 @@ -package btree - -import ( - "bytes" - "reflect" - "testing" -) - -func TestNode(t *testing.T) { - t.Run("encode leaf", func(t *testing.T) { - n := &Node{ - Keys: []DataPointer{ - { - RecordOffset: 0, - FieldOffset: 0, - Length: 5, - }, - { - RecordOffset: 0, - FieldOffset: 5, - Length: 5, - }, - }, - Leaf: true, - } - buf := &bytes.Buffer{} - if _, err := n.WriteTo(buf); err != nil { - t.Fatal(err) - } - if buf.Len() != 1+16*2 { - t.Fatalf("expected buffer length to be 1+16*2+8*3, got %d", buf.Len()) - } - }) - - t.Run("encode leaf ignores children", func(t *testing.T) { - n := &Node{ - Keys: []DataPointer{ - { - RecordOffset: 0, - FieldOffset: 0, - Length: 5, - }, - { - RecordOffset: 0, - FieldOffset: 5, - Length: 5, - }, - }, - Leaf: true, - Children: []uint64{1, 2, 3}, - } - buf := &bytes.Buffer{} - if _, err := n.WriteTo(buf); err != nil { - t.Fatal(err) - } - if buf.Len() != 1+16*2 { - t.Fatalf("expected buffer length to be 1+16*2+8*3, got %d", buf.Len()) - } - }) - - t.Run("encode non-leaf", func(t *testing.T) { - n := &Node{ - Keys: []DataPointer{ - { - RecordOffset: 0, - FieldOffset: 0, - Length: 5, - }, - { - RecordOffset: 0, - FieldOffset: 5, - Length: 5, - }, - }, - Leaf: false, - Children: []uint64{1, 2, 3}, - } - buf := &bytes.Buffer{} - if _, err := n.WriteTo(buf); err != nil { - t.Fatal(err) - } - if buf.Len() != 1+16*2+8*3 { - t.Fatalf("expected buffer length to be 1+16*2+8*3, got %d", buf.Len()) - } - }) - - t.Run("decode leaf", func(t *testing.T) { - n := &Node{ - Keys: []DataPointer{ - { - RecordOffset: 0, - FieldOffset: 0, - Length: 5, - }, - { - RecordOffset: 0, - FieldOffset: 5, - Length: 5, - }, - }, - Leaf: true, - } - buf := &bytes.Buffer{} - if _, err := n.WriteTo(buf); err != nil { - t.Fatal(err) - } - m := &Node{} - if _, err := m.ReadFrom(buf); err != nil { - t.Fatal(err) - } - if !reflect.DeepEqual(n, m) { - t.Fatalf("expected decoded node to be equal to original node, got %#v want %#v", m, n) - } - }) - - t.Run("decode non-leaf", func(t *testing.T) { - n := &Node{ - Keys: []DataPointer{ - { - RecordOffset: 0, - FieldOffset: 0, - Length: 5, - }, - { - RecordOffset: 0, - FieldOffset: 5, - Length: 5, - }, - }, - Leaf: false, - Children: []uint64{1, 2, 3}, - } - buf := &bytes.Buffer{} - if _, err := n.WriteTo(buf); err != nil { - t.Fatal(err) - } - m := &Node{} - if _, err := m.ReadFrom(buf); err != nil { - t.Fatal(err) - } - if !reflect.DeepEqual(n, m) { - t.Fatalf("expected decoded node to be equal to original node, got %#v want %#v", m, n) - } - }) -} - -func TestDataPointer(t *testing.T) { - buf := newSeekableBuffer() - if _, err := buf.Write([]byte("moocowslmao")); err != nil { - t.Fatal(err) - } - p := DataPointer{ - RecordOffset: 1, - FieldOffset: 2, - Length: 5, - } - b, err := p.Value(buf) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(b, []byte("cowsl")) { - t.Fatalf("expected value to be ocows, got %s", string(b)) - } -}