Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split pointers into Leaf and Internal #93

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 76 additions & 46 deletions pkg/btree/bptree.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (t *BPTree) Find(key []byte) (MemoryPointer, bool, error) {
n := path[0].node
i, found := n.bsearch(key)
if found {
return n.Pointers[i], true, nil
return n.LeafPointers[i], true, nil
}
return MemoryPointer{}, false, nil
}
Expand Down Expand Up @@ -85,36 +85,54 @@ func (t *BPTree) traverse(key []byte, node *BPTreeNode, ptr MemoryPointer) ([]Tr
return []TraversalRecord{{node: node, ptr: ptr}}, nil
}
for i, k := range node.Keys {
if bytes.Compare(key, k.Value) < 0 {
if node.Pointers[i].Offset == ptr.Offset {
log.Printf("infinite loop index %d", i)
log.Printf("%#v", node)
log.Printf("node offset %#v ptr offset %#v", node.Pointers[i].Offset, ptr.Offset)
if bytes.Compare(key, k.Value) < 0 || i == len(node.Keys)-1 {
var childPointer MemoryPointer

if len(node.InternalPointers) > 0 {
childPointer.Offset = node.InternalPointers[i]
childPointer.Length = pageSizeBytes
} else {
log.Printf("Internal node without internal pointers")
panic("infinite loop")
}
child, err := t.readNode(node.Pointers[i])

child, err := t.readNode(childPointer)
if err != nil {
return nil, err
}
path, err := t.traverse(key, child, node.Pointers[i])
path, err := t.traverse(key, child, childPointer)
if err != nil {
return nil, err
}
return append(path, TraversalRecord{node: node, index: i, ptr: ptr}), nil
}
}
if node.Pointers[len(node.Pointers)-1].Offset == ptr.Offset {
panic("infinite loop 2")
}
child, err := t.readNode(node.Pointers[len(node.Pointers)-1])
if err != nil {
return nil, err
}
path, err := t.traverse(key, child, node.Pointers[len(node.Pointers)-1])
if err != nil {
return nil, err

// handle right most child for a key greater than all keys
lastIndex := len(node.Keys) // since len(node.pointers) - 1 == len(node.keys) for inner
if len(node.InternalPointers) > lastIndex {
var lastChildPointer MemoryPointer = MemoryPointer{
Offset: node.InternalPointers[lastIndex],
Length: pageSizeBytes,
}

if lastChildPointer.Offset == ptr.Offset {
panic("infinite loop detected")
}

child, err := t.readNode(lastChildPointer)
if err != nil {
return nil, err
}
path, err := t.traverse(key, child, lastChildPointer)
if err != nil {
return nil, err
}
return append(path, TraversalRecord{node: node, index: lastIndex, ptr: ptr}), nil

}
return append(path, TraversalRecord{node: node, index: len(node.Keys), ptr: ptr}), nil
log.Printf("Internal node missing last pointer")
return nil, fmt.Errorf("internal node missing last pointer")
}

func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
Expand All @@ -126,7 +144,8 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
// special case, create the root as the first node
node := &BPTreeNode{Data: t.Data}
node.Keys = []ReferencedValue{key}
node.Pointers = []MemoryPointer{value}
node.LeafPointers = []MemoryPointer{value}

buf, err := node.MarshalBinary()
if err != nil {
return err
Expand All @@ -146,14 +165,19 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
// insert the key into the leaf
n := path[0].node
j, _ := n.bsearch(key.Value)

if !n.leaf() {
return fmt.Errorf("attempted to insert into a non-leaf node")
}

if j == len(n.Keys) {
n.Keys = append(n.Keys, key)
n.Pointers = append(n.Pointers, value)
n.LeafPointers = append(n.LeafPointers, value)
} else {
n.Keys = append(n.Keys[:j+1], n.Keys[j:]...)
n.Keys[j] = key
n.Pointers = append(n.Pointers[:j+1], n.Pointers[j:]...)
n.Pointers[j] = value
n.LeafPointers = append(n.LeafPointers[:j+1], n.LeafPointers[j:]...)
n.LeafPointers[j] = value
}

// traverse up the tree and split if necessary
Expand All @@ -169,34 +193,35 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
// n is the left node, m the right node
m := &BPTreeNode{Data: t.Data}
if n.leaf() {
m.Pointers = n.Pointers[mid:]
m.Keys = n.Keys[mid:]
m.LeafPointers = append([]MemoryPointer(nil), n.LeafPointers[mid:]...)
n.LeafPointers = n.LeafPointers[:mid] // Adjust the original node's LeafPointers
} else {
// for non-leaf nodes, the mid key is inserted into the parent
m.Pointers = n.Pointers[mid+1:]
m.Keys = n.Keys[mid+1:]
// Adjust for non-leaf nodes using InternalPointers
m.InternalPointers = append([]uint64(nil), n.InternalPointers[mid+1:]...) // Skip the middle key's pointer for the right node
n.InternalPointers = append([]uint64(nil), n.InternalPointers[:mid+1]...) // Include the middle key's pointer for the left node
}
n.Keys = append([]ReferencedValue(nil), n.Keys[mid+1:]...)
n.Keys = n.Keys[:mid]

mbuf, err := m.MarshalBinary()
if err != nil {
return err
}

fmt.Printf("mbuf %v", mbuf)

moffset, err := t.tree.NewPage(mbuf)
if err != nil {
return err
}

if n.leaf() {
n.Pointers = n.Pointers[:mid]
n.Keys = n.Keys[:mid]
} else {
n.Pointers = n.Pointers[:mid+1]
n.Keys = n.Keys[:mid]
}

nbuf, err := n.MarshalBinary()
if err != nil {
return err
}

fmt.Printf("nbuf: %v", nbuf)

noffset := tr.ptr.Offset
if _, err := t.tree.Seek(int64(noffset), io.SeekStart); err != nil {
return err
Expand All @@ -215,17 +240,22 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
p.node.Keys = append(p.node.Keys[:p.index+1], p.node.Keys[p.index:]...)
p.node.Keys[p.index] = midKey
}
p.node.Pointers = append(p.node.Pointers[:p.index+1], p.node.Pointers[p.index:]...)
p.node.Pointers[p.index] = MemoryPointer{Offset: uint64(noffset), Length: uint32(len(nbuf))}
p.node.Pointers[p.index+1] = MemoryPointer{Offset: uint64(moffset), Length: uint32(len(mbuf))}

if p.node.leaf() {
return fmt.Errorf("unexpected leaf node while trying to update parent")
}

p.node.InternalPointers = append(p.node.InternalPointers[:p.index+1], p.node.InternalPointers[p.index:]...)
p.node.InternalPointers[p.index] = uint64(noffset)
p.node.InternalPointers[p.index+1] = uint64(moffset)
// the parent will be written to disk in the next iteration
} else {
// the root split, so create a new root
p := &BPTreeNode{Data: t.Data}
p.Keys = []ReferencedValue{midKey}
p.Pointers = []MemoryPointer{
{Offset: uint64(noffset), Length: uint32(len(nbuf))},
{Offset: uint64(moffset), Length: uint32(len(mbuf))},
p.InternalPointers = []uint64{
uint64(noffset),
uint64(moffset),
}

pbuf, err := p.MarshalBinary()
Expand Down Expand Up @@ -353,21 +383,21 @@ func (t *BPTree) recursiveString(n *BPTreeNode, indent int) string {
// print the node itself
var buf bytes.Buffer
if !n.leaf() {
for i := range n.Pointers {
child, err := t.readNode(n.Pointers[i])
for i := range n.LeafPointers {
child, err := t.readNode(n.LeafPointers[i])
if err != nil {
return fmt.Sprintf("error: failed to read child node: %v", err)
}
buf.WriteString(t.recursiveString(child, indent+1))
if i < len(n.Pointers)-1 {
if i < len(n.LeafPointers)-1 {
for i := 0; i < indent; i++ {
buf.WriteString(" ")
}
buf.WriteString(fmt.Sprintf("key %v\n", n.Keys[i]))
}
}
} else {
for i := range n.Pointers {
for i := range n.InternalPointers {
for i := 0; i < indent; i++ {
buf.WriteString(" ")
}
Expand Down
73 changes: 58 additions & 15 deletions pkg/btree/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,27 @@ type BPTreeNode struct {
Data []byte
// contains the offset of the child node or the offset of the record for leaf
// if the node is a leaf, the last pointer is the offset of the next leaf
Pointers []MemoryPointer
Keys []ReferencedValue
LeafPointers []MemoryPointer
InternalPointers []uint64
Keys []ReferencedValue
}

func (n *BPTreeNode) leaf() bool {
// leafs contain the same number of pointers as keys
return len(n.Pointers) == len(n.Keys)
return len(n.LeafPointers) == len(n.Keys)
}

func (n *BPTreeNode) Pointers() []MemoryPointer {
if n.leaf() {
return n.LeafPointers
}

pointers := make([]MemoryPointer, len(n.InternalPointers))

for i, offset := range n.InternalPointers {
pointers[i] = MemoryPointer{Offset: offset, Length: pageSizeBytes}
}
return pointers
}

func (n *BPTreeNode) Size() int64 {
Expand All @@ -44,21 +58,33 @@ func (n *BPTreeNode) Size() int64 {
size += 4 + len(k.Value)
}
}
for range n.Pointers {
size += 12

if n.leaf() {
for range n.LeafPointers {
size += 12
}
} else {
for range n.InternalPointers {
size += 8
}
}

return int64(size)
}

func (n *BPTreeNode) MarshalBinary() ([]byte, error) {
fmt.Println("unmarshall")
size := int32(len(n.Keys))
buf := make([]byte, n.Size())
// set the first bit to 1 if it's a leaf
if n.leaf() {
binary.BigEndian.PutUint32(buf[:4], uint32(-size))

} else {
binary.BigEndian.PutUint32(buf[:4], uint32(size))
}

fmt.Printf("is leaf v, size %v", size)
if size == 0 {
panic("writing empty node")
}
Expand All @@ -78,10 +104,18 @@ func (n *BPTreeNode) MarshalBinary() ([]byte, error) {
ct += m + 4
}
}
for _, p := range n.Pointers {
binary.BigEndian.PutUint64(buf[ct:ct+8], p.Offset)
binary.BigEndian.PutUint32(buf[ct+8:ct+12], p.Length)
ct += 12
if n.leaf() {
for _, p := range n.LeafPointers {
binary.BigEndian.PutUint64(buf[ct:ct+8], p.Offset)
binary.BigEndian.PutUint32(buf[ct+8:ct+12], p.Length)
ct += 12
}
} else {
for _, offset := range n.InternalPointers {
binary.BigEndian.PutUint64(buf[ct:ct+8], offset)
// For internal pointers, we do not store Length, as these pointers refer to whole pages.
ct += 8
}
}
if ct != int(n.Size()) {
panic("size mismatch")
Expand All @@ -102,10 +136,10 @@ func (n *BPTreeNode) UnmarshalBinary(buf []byte) error {
size := int32(binary.BigEndian.Uint32(buf[:4]))
leaf := size < 0
if leaf {
n.Pointers = make([]MemoryPointer, -size)
n.LeafPointers = make([]MemoryPointer, -size)
n.Keys = make([]ReferencedValue, -size)
} else {
n.Pointers = make([]MemoryPointer, size+1)
n.InternalPointers = make([]uint64, size+1)
n.Keys = make([]ReferencedValue, size)
}
if size == 0 {
Expand All @@ -127,11 +161,20 @@ func (n *BPTreeNode) UnmarshalBinary(buf []byte) error {
m += 4 + int(l)
}
}
for i := range n.Pointers {
n.Pointers[i].Offset = binary.BigEndian.Uint64(buf[m : m+8])
n.Pointers[i].Length = binary.BigEndian.Uint32(buf[m+8 : m+12])
m += 12

if leaf {
for i := range n.LeafPointers {
n.LeafPointers[i].Offset = binary.BigEndian.Uint64(buf[m : m+8])
n.LeafPointers[i].Length = binary.BigEndian.Uint32(buf[m+8 : m+12])
m += 12
}
} else {
for i := range n.InternalPointers {
n.InternalPointers[i] = binary.BigEndian.Uint64(buf[m : m+8])
m += 8
}
}

return nil
}

Expand Down
9 changes: 3 additions & 6 deletions pkg/btree/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
func TestBPTreeNode_ReadWriteLeaf(t *testing.T) {
// Create a test BPTreeNode
node1 := &BPTreeNode{
Pointers: []MemoryPointer{
LeafPointers: []MemoryPointer{
{Offset: 0, Length: 1},
{Offset: 1, Length: 2},
{Offset: 2, Length: 3},
Expand Down Expand Up @@ -43,11 +43,8 @@ func TestBPTreeNode_ReadWriteLeaf(t *testing.T) {
func TestBPTreeNode_ReadWriteIntermediate(t *testing.T) {
// Create a test BPTreeNode
node1 := &BPTreeNode{
Pointers: []MemoryPointer{
{Offset: 0, Length: 1},
{Offset: 1, Length: 2},
{Offset: 2, Length: 3},
{Offset: 3, Length: 4},
InternalPointers: []uint64{
0, 1, 2, 3,
},
Keys: []ReferencedValue{
{Value: []byte{0}},
Expand Down
Loading