From d5e25376382f3211b5a0887e84e859e12b0fdcd6 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Sat, 20 Jan 2024 16:30:39 -0500 Subject: [PATCH 1/3] feat: add pagefile api (#47) --- pkg/btree/pagefile.go | 31 +++++++++++++++++++ pkg/btree/pagefile_test.go | 62 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 pkg/btree/pagefile.go create mode 100644 pkg/btree/pagefile_test.go diff --git a/pkg/btree/pagefile.go b/pkg/btree/pagefile.go new file mode 100644 index 00000000..24b0bb55 --- /dev/null +++ b/pkg/btree/pagefile.go @@ -0,0 +1,31 @@ +package btree + +import "io" + +type PageFile struct { + io.ReadWriteSeeker + PageSize int +} + +func (pf *PageFile) Seek(offset int64, whence int) (int64, error) { + if offset == 0 && whence == io.SeekEnd { + // Seek to the end of the file + offset, err := pf.ReadWriteSeeker.Seek(0, io.SeekEnd) + if err != nil { + return 0, err + } + // If the offset is not a multiple of the page size, we need to pad the file + // with zeros to the next page boundary. + if pf.PageSize > 0 && offset%int64(pf.PageSize) != 0 { + // Calculate the number of bytes to pad + pad := int64(pf.PageSize) - (offset % int64(pf.PageSize)) + // Write the padding + if _, err := pf.Write(make([]byte, pad)); err != nil { + return 0, err + } + return offset + pad, nil + } + return offset, nil + } + return pf.ReadWriteSeeker.Seek(offset, whence) +} diff --git a/pkg/btree/pagefile_test.go b/pkg/btree/pagefile_test.go new file mode 100644 index 00000000..b2a010d8 --- /dev/null +++ b/pkg/btree/pagefile_test.go @@ -0,0 +1,62 @@ +package btree + +import ( + "io" + "testing" +) + +func TestPageFile(t *testing.T) { + t.Run("no page size behaves like regular ReadWriteSeeker", func(t *testing.T) { + buf := newSeekableBuffer() + pf := &PageFile{ + ReadWriteSeeker: buf, + } + if _, err := pf.Seek(0, io.SeekStart); err != nil { + t.Fatal(err) + } + if _, err := pf.Write([]byte("hello")); err != nil { + t.Fatal(err) + } + if _, err := pf.Seek(0, io.SeekStart); err != nil { + t.Fatal(err) + } + b := make([]byte, 5) + if _, err := pf.Read(b); err != nil { + t.Fatal(err) + } + if string(b) != "hello" { + t.Fatalf("expected %q, got %q", "hello", string(b)) + } + }) + + t.Run("page size allocates pages on seek end", func(t *testing.T) { + buf := newSeekableBuffer() + pf := &PageFile{ + ReadWriteSeeker: buf, + PageSize: 16, + } + if _, err := pf.Seek(0, io.SeekEnd); err != nil { + t.Fatal(err) + } + if _, err := pf.Write([]byte("hello")); err != nil { + t.Fatal(err) + } + if _, err := pf.Seek(0, io.SeekStart); err != nil { + t.Fatal(err) + } + b := make([]byte, 5) + if _, err := pf.Read(b); err != nil { + t.Fatal(err) + } + if string(b) != "hello" { + t.Fatalf("expected %q, got %q", "hello", string(b)) + } + n, err := pf.Seek(0, io.SeekEnd) + if err != nil { + t.Fatal(err) + } + if n != 16 { + t.Fatalf("expected %d, got %d", 16, n) + } + }) +} From 9773df77eeec86c8fcd441e932febc03b6ec8c60 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Sun, 21 Jan 2024 13:45:57 -0500 Subject: [PATCH 2/3] feat: use queue for free pages (#49) --- pkg/btree/README.md | 18 ++++++ pkg/btree/pagefile.go | 119 +++++++++++++++++++++++++++++++------ pkg/btree/pagefile_test.go | 114 +++++++++++++++++++++++++++-------- 3 files changed, 210 insertions(+), 41 deletions(-) diff --git a/pkg/btree/README.md b/pkg/btree/README.md index 5612b781..4a895418 100644 --- a/pkg/btree/README.md +++ b/pkg/btree/README.md @@ -2,3 +2,21 @@ This package implements an on-disk B+ tree, taking some inspiration from https://github.com/spy16/kiwi/tree/master/index/bptree. + +## On the significance of the 4kB page size + +The B+ tree is designed to be stored on disk, and as such, it is designed to +take advantage of the 4kB page size of most disks. However, in practice we +don't see a material impact on performance when using alternative sizes. So +why do we choose to use 4kB pages? + +In order to garbage collect old B+ tree nodes, we want to have pointers to +freed pages to deallocate them entirely. That is, if we did not use page sizes +and stored nodes contiguously, it would be difficult to garbage collect the exact +number of bytes and we would end up with fragmentation. By using page sizes, we +can simply store a list of freed pages and deallocate them entirely and we can +be sure that the freed page will be sufficient to store the new node. + +Therefore, we must choose a page size that is large enough to store a node. +In practice, the choice of 4kB specifically is arbitrary, but it is a nice way +to align with the page size of most disks. diff --git a/pkg/btree/pagefile.go b/pkg/btree/pagefile.go index 24b0bb55..271b4469 100644 --- a/pkg/btree/pagefile.go +++ b/pkg/btree/pagefile.go @@ -1,31 +1,116 @@ package btree -import "io" +import ( + "encoding/binary" + "errors" + "io" + "log" +) + +type ReadWritePager interface { + io.ReadWriter + + NewPage() (int64, error) + FreePage(int64) error +} type PageFile struct { io.ReadWriteSeeker PageSize int + + // local cache of free pages to avoid reading from disk too often. + freePageIndexes [512]int64 } -func (pf *PageFile) Seek(offset int64, whence int) (int64, error) { - if offset == 0 && whence == io.SeekEnd { - // Seek to the end of the file - offset, err := pf.ReadWriteSeeker.Seek(0, io.SeekEnd) - if err != nil { - return 0, err +const maxFreePageIndices = 512 +const pageSizeBytes = 4096 // 4kB by default. + +func NewPageFile(rws io.ReadWriteSeeker) (*PageFile, error) { + // check if the rws is empty. if it is, allocate one page for the free page indexes + // if it is not, read the free page indexes from the last page + if _, err := rws.Seek(0, io.SeekStart); err != nil { + return nil, err + } + buf := make([]byte, pageSizeBytes) + _, err := rws.Read(buf) + if err != nil && err != io.EOF { + return nil, err + } + pf := &PageFile{ + ReadWriteSeeker: rws, + PageSize: pageSizeBytes, + } + if err == io.EOF { + // allocate one page for the free page indexes + if _, err := rws.Write(make([]byte, pageSizeBytes)); err != nil { + return nil, err + } + } else { + for i := 0; i < len(pf.freePageIndexes); i++ { + pf.freePageIndexes[i] = int64(binary.BigEndian.Uint64(buf[i*8 : (i+1)*8])) } - // If the offset is not a multiple of the page size, we need to pad the file - // with zeros to the next page boundary. - if pf.PageSize > 0 && offset%int64(pf.PageSize) != 0 { - // Calculate the number of bytes to pad - pad := int64(pf.PageSize) - (offset % int64(pf.PageSize)) - // Write the padding - if _, err := pf.Write(make([]byte, pad)); err != nil { + } + return pf, nil +} + +func (pf *PageFile) NewPage() (int64, error) { + // if there are free pages, return the first one + for i := 0; i < len(pf.freePageIndexes); i++ { + if pf.freePageIndexes[i] != 0 { + log.Printf("found free page at index %d", i) + offset := pf.freePageIndexes[i] + // zero out this free page index on disk + if _, err := pf.ReadWriteSeeker.Seek(int64(i*8), io.SeekStart); err != nil { + return 0, err + } + if _, err := pf.ReadWriteSeeker.Write(make([]byte, 8)); err != nil { return 0, err } - return offset + pad, nil + // seek to the free page + if _, err := pf.ReadWriteSeeker.Seek(offset, io.SeekStart); err != nil { + return 0, err + } + return offset, nil + } + } + + // seek to the end of the file + offset, err := pf.ReadWriteSeeker.Seek(0, io.SeekEnd) + if err != nil { + return 0, err + } + // if the offset is not a multiple of the page size, we need to pad the file + // with zeros to the next page boundary. + if pf.PageSize > 0 && offset%int64(pf.PageSize) != 0 { + // Calculate the number of bytes to pad + pad := int64(pf.PageSize) - (offset % int64(pf.PageSize)) + // Write the padding + if _, err := pf.Write(make([]byte, pad)); err != nil { + return 0, err + } + return offset + pad, nil + } + return offset, nil +} + +func (pf *PageFile) FreePage(offset int64) error { + if offset%int64(pf.PageSize) != 0 { + return errors.New("offset is not a multiple of the page size") + } + // find the last nonzero free page index and insert it after that + for i := len(pf.freePageIndexes) - 1; i >= 0; i-- { + if pf.freePageIndexes[i] == 0 { + j := (i + 1) % len(pf.freePageIndexes) + pf.freePageIndexes[j] = offset + + // write the free page index to the last page + buf := make([]byte, 8) + binary.BigEndian.PutUint64(buf, uint64(offset)) + if _, err := pf.ReadWriteSeeker.Seek(int64(j*8), io.SeekStart); err != nil { + return err + } + return nil } - return offset, nil } - return pf.ReadWriteSeeker.Seek(offset, whence) + return errors.New("too many free pages") } diff --git a/pkg/btree/pagefile_test.go b/pkg/btree/pagefile_test.go index b2a010d8..3a6cd78c 100644 --- a/pkg/btree/pagefile_test.go +++ b/pkg/btree/pagefile_test.go @@ -6,57 +6,123 @@ import ( ) func TestPageFile(t *testing.T) { - t.Run("no page size behaves like regular ReadWriteSeeker", func(t *testing.T) { + t.Run("allocates first page", func(t *testing.T) { buf := newSeekableBuffer() - pf := &PageFile{ - ReadWriteSeeker: buf, + pf, err := NewPageFile(buf) + if err != nil { + t.Fatal(err) + } + offset, err := pf.NewPage() + if err != nil { + t.Fatal(err) + } + if offset != pageSizeBytes { + t.Fatalf("expected offset %d, got %d", pageSizeBytes, offset) + } + }) + + t.Run("page size reuses page without allocation", func(t *testing.T) { + buf := newSeekableBuffer() + pf, err := NewPageFile(buf) + if err != nil { + t.Fatal(err) } - if _, err := pf.Seek(0, io.SeekStart); err != nil { + offset1, err := pf.NewPage() + if err != nil { + t.Fatal(err) + } + if offset1 != pageSizeBytes { + t.Fatalf("expected offset %d, got %d", pageSizeBytes, offset1) + } + // since no data has been written, this page should be reused. + offset2, err := pf.NewPage() + if err != nil { + t.Fatal(err) + } + if offset2 != pageSizeBytes { + t.Fatalf("expected offset %d, got %d", pageSizeBytes*2, offset2) + } + }) + + t.Run("page size allocates second page", func(t *testing.T) { + buf := newSeekableBuffer() + pf, err := NewPageFile(buf) + if err != nil { t.Fatal(err) } - if _, err := pf.Write([]byte("hello")); err != nil { + offset1, err := pf.NewPage() + if err != nil { t.Fatal(err) } - if _, err := pf.Seek(0, io.SeekStart); err != nil { + if offset1 != pageSizeBytes { + t.Fatalf("expected offset %d, got %d", pageSizeBytes, offset1) + } + // need to write at least one byte to trigger a new page. + if _, err := pf.Write(make([]byte, 1)); err != nil { t.Fatal(err) } - b := make([]byte, 5) - if _, err := pf.Read(b); err != nil { + offset2, err := pf.NewPage() + if err != nil { t.Fatal(err) } - if string(b) != "hello" { - t.Fatalf("expected %q, got %q", "hello", string(b)) + if offset2 != pageSizeBytes*2 { + t.Fatalf("expected offset %d, got %d", pageSizeBytes*2, offset2) } }) - t.Run("page size allocates pages on seek end", func(t *testing.T) { + t.Run("new page seeks to page", func(t *testing.T) { buf := newSeekableBuffer() - pf := &PageFile{ - ReadWriteSeeker: buf, - PageSize: 16, + pf, err := NewPageFile(buf) + if err != nil { + t.Fatal(err) } - if _, err := pf.Seek(0, io.SeekEnd); err != nil { + offset1, err := pf.NewPage() + if err != nil { t.Fatal(err) } - if _, err := pf.Write([]byte("hello")); err != nil { + offset2, err := pf.Seek(0, io.SeekCurrent) + if err != nil { t.Fatal(err) } - if _, err := pf.Seek(0, io.SeekStart); err != nil { + if offset1 != offset2 { + t.Fatalf("expected offset %d, got %d", offset1, offset2) + } + }) + + t.Run("free page reuses page", func(t *testing.T) { + buf := newSeekableBuffer() + pf, err := NewPageFile(buf) + if err != nil { + t.Fatal(err) + } + offset1, err := pf.NewPage() + if err != nil { t.Fatal(err) } - b := make([]byte, 5) - if _, err := pf.Read(b); err != nil { + if offset1 != pageSizeBytes { + t.Fatalf("expected offset %d, got %d", pageSizeBytes, offset1) + } + // need to write at least one byte to trigger a new page. + if _, err := pf.Write(make([]byte, 1)); err != nil { t.Fatal(err) } - if string(b) != "hello" { - t.Fatalf("expected %q, got %q", "hello", string(b)) + offset2, err := pf.NewPage() + if err != nil { + t.Fatal(err) + } + if offset2 != pageSizeBytes*2 { + t.Fatalf("expected offset %d, got %d", pageSizeBytes, offset2) + } + + if err := pf.FreePage(offset1); err != nil { + t.Fatal(err) } - n, err := pf.Seek(0, io.SeekEnd) + offset3, err := pf.NewPage() if err != nil { t.Fatal(err) } - if n != 16 { - t.Fatalf("expected %d, got %d", 16, n) + if offset3 != offset1 { + t.Fatalf("expected offset %d, got %d", offset2, offset3) } }) } From 7aa3cff163f1072be4589d8df0d0edc4b262c94b Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Sun, 21 Jan 2024 14:45:39 -0500 Subject: [PATCH 3/3] feat: wire up b+ tree with pager (#50) --- pkg/btree/bptree.go | 197 +++------------------------------------ pkg/btree/bptree_test.go | 82 +++++++--------- pkg/btree/multi.go | 24 +++-- pkg/btree/multi_test.go | 52 +++++++---- pkg/btree/pagefile.go | 20 ++-- 5 files changed, 109 insertions(+), 266 deletions(-) diff --git a/pkg/btree/bptree.go b/pkg/btree/bptree.go index f6b037b4..4f26152e 100644 --- a/pkg/btree/bptree.go +++ b/pkg/btree/bptree.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "io" - "slices" ) // MetaPage is an abstract interface over the root page of a btree @@ -15,20 +14,15 @@ type MetaPage interface { SetRoot(MemoryPointer) error } -type ReadWriteSeekTruncater interface { - io.ReadWriteSeeker - Truncate(size int64) error -} - type BPTree struct { - tree ReadWriteSeekTruncater + tree ReadWriteSeekPager meta MetaPage maxPageSize int } -func NewBPTree(tree ReadWriteSeekTruncater, meta MetaPage, maxPageSize int) *BPTree { - return &BPTree{tree: tree, meta: meta, maxPageSize: maxPageSize} +func NewBPTree(tree ReadWriteSeekPager, meta MetaPage) *BPTree { + return &BPTree{tree: tree, meta: meta} } func (t *BPTree) root() (*BPTreeNode, MemoryPointer, error) { @@ -116,7 +110,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error { } if root == nil { // special case, create the root as the first node - offset, err := t.tree.Seek(0, io.SeekEnd) + offset, err := t.tree.NewPage() if err != nil { return err } @@ -151,9 +145,9 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error { for i := 0; i < len(path); i++ { tr := path[i] n := tr.node - if len(n.Keys) > t.maxPageSize { + if len(n.Keys) > t.tree.PageSize() { // split the node - moffset, err := t.tree.Seek(0, io.SeekEnd) + moffset, err := t.tree.NewPage() if err != nil { return err } @@ -184,7 +178,11 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error { n.Pointers = n.Pointers[:mid+1] n.Keys = n.Keys[:mid] } - noffset := moffset + msize + + noffset, err := t.tree.NewPage() + if err != nil { + return err + } nsize, err := n.WriteTo(t.tree) if err != nil { return err @@ -227,7 +225,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error { } } else { // write this node to disk and update the parent - offset, err := t.tree.Seek(0, io.SeekEnd) + offset, err := t.tree.NewPage() if err != nil { return err } @@ -337,174 +335,3 @@ type Entry struct { // } // } // } - -func (t *BPTree) compact() error { - // read all the nodes and compile a list of nodes still referenced, - // then write out the nodes in order, removing unreferenced nodes and updating - // the parent pointers. - - _, rootOffset, err := t.root() - if err != nil { - return err - } - - if _, err := t.tree.Seek(0, io.SeekStart); err != nil { - return err - } - - references := []MemoryPointer{rootOffset} - for { - node := &BPTreeNode{} - if _, err := node.ReadFrom(t.tree); err != nil { - if err == io.EOF { - break - } - return err - } - if !node.leaf() { - // all pointers are references - references = append(references, node.Pointers...) - } - } - - // read all the nodes again and write out the referenced nodes - if _, err := t.tree.Seek(0, io.SeekStart); err != nil { - return err - } - - slices.SortFunc(references, func(x, y MemoryPointer) int { - return int(x.Offset - y.Offset) - }) - - referenceMap := make(map[uint64]MemoryPointer) - - offset := 0 - for i, reference := range references { - // skip duplicates - if i > 0 && references[i-1] == reference { - continue - } - // read the referenced node - if _, err := t.tree.Seek(int64(reference.Offset), io.SeekStart); err != nil { - return err - } - node := &BPTreeNode{} - if _, err := node.ReadFrom(t.tree); err != nil { - return err - } - // write the node to the new offset - if _, err := t.tree.Seek(int64(offset), io.SeekStart); err != nil { - return err - } - n, err := node.WriteTo(t.tree) - if err != nil { - return err - } - // update the reference map - referenceMap[reference.Offset] = MemoryPointer{Offset: uint64(offset), Length: uint32(n)} - offset += int(n) - } - - // truncate the file - if err := t.tree.Truncate(int64(offset)); err != nil { - return err - } - - // update the parent pointers - if _, err := t.tree.Seek(0, io.SeekStart); err != nil { - return err - } - for { - offset, err := t.tree.Seek(0, io.SeekCurrent) - if err != nil { - return err - } - node := &BPTreeNode{} - if _, err := node.ReadFrom(t.tree); err != nil { - if err == io.EOF { - break - } - return err - } - if !node.leaf() { - // all pointers are references - for i, p := range node.Pointers { - node.Pointers[i] = referenceMap[p.Offset] - } - } - if _, err := t.tree.Seek(offset, io.SeekStart); err != nil { - return err - } - if _, err := node.WriteTo(t.tree); err != nil { - return err - } - } - - // update the meta pointer - return t.meta.SetRoot(referenceMap[rootOffset.Offset]) -} - -func (t *BPTree) String() string { - var buf bytes.Buffer - // get the current seek position - seekPos, err := t.tree.Seek(0, io.SeekCurrent) - if err != nil { - return err.Error() - } - defer func() { - // reset the seek position - if _, err := t.tree.Seek(seekPos, io.SeekStart); err != nil { - panic(err) - } - }() - root, rootOffset, err := t.root() - if err != nil { - return err.Error() - } - if root == nil { - return "empty tree" - } - if _, err := buf.Write([]byte(fmt.Sprintf("root: %d\n", rootOffset))); err != nil { - return err.Error() - } - // seek to 8 - if _, err := t.tree.Seek(0, io.SeekStart); err != nil { - return err.Error() - } - for { - offset, err := t.tree.Seek(0, io.SeekCurrent) - if err != nil { - return err.Error() - } - node := &BPTreeNode{} - if _, err := node.ReadFrom(t.tree); err != nil { - if err == io.EOF { - break - } - return err.Error() - } - if node.leaf() { - if _, err := buf.Write([]byte(fmt.Sprintf("%04d | ", offset))); err != nil { - return err.Error() - } - } else { - if _, err := buf.Write([]byte(fmt.Sprintf("%04d ", offset))); err != nil { - return err.Error() - } - } - for i := 0; i < len(node.Pointers); i++ { - if _, err := buf.Write([]byte(fmt.Sprintf("%04d ", node.Pointers[i]))); err != nil { - return err.Error() - } - if i < len(node.Keys) { - if _, err := buf.Write([]byte(fmt.Sprintf("%02d ", node.Keys[i]))); err != nil { - return err.Error() - } - } - } - if _, err := buf.Write([]byte("\n")); err != nil { - return err.Error() - } - } - return buf.String() -} diff --git a/pkg/btree/bptree_test.go b/pkg/btree/bptree_test.go index 04edbfd3..ef1e6cd2 100644 --- a/pkg/btree/bptree_test.go +++ b/pkg/btree/bptree_test.go @@ -2,7 +2,6 @@ package btree import ( "encoding/binary" - "fmt" "testing" ) @@ -22,7 +21,11 @@ func (m *testMetaPage) Root() (MemoryPointer, error) { func TestBPTree(t *testing.T) { t.Run("empty tree", func(t *testing.T) { b := newSeekableBuffer() - tree := NewBPTree(b, &testMetaPage{}, 4096) + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree := NewBPTree(p, &testMetaPage{}) // find a key that doesn't exist _, found, err := tree.Find([]byte("hello")) if err != nil { @@ -35,7 +38,11 @@ func TestBPTree(t *testing.T) { t.Run("insert creates a root", func(t *testing.T) { b := newSeekableBuffer() - tree := NewBPTree(b, &testMetaPage{}, 4096) + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree := NewBPTree(p, &testMetaPage{}) if err := tree.Insert(ReferencedValue{Value: []byte("hello")}, MemoryPointer{Offset: 1}); err != nil { t.Fatal(err) } @@ -53,7 +60,11 @@ func TestBPTree(t *testing.T) { t.Run("insert into root", func(t *testing.T) { b := newSeekableBuffer() - tree := NewBPTree(b, &testMetaPage{}, 4096) + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree := NewBPTree(p, &testMetaPage{}) if err := tree.Insert(ReferencedValue{Value: []byte("hello")}, MemoryPointer{Offset: 1}); err != nil { t.Fatal(err) } @@ -82,33 +93,13 @@ func TestBPTree(t *testing.T) { } }) - t.Run("compacting after second root insertion removes old root", func(t *testing.T) { + t.Run("split root", func(t *testing.T) { b := newSeekableBuffer() - tree := NewBPTree(b, &testMetaPage{}, 4096) - if err := tree.Insert(ReferencedValue{Value: []byte("hello")}, MemoryPointer{Offset: 1}); err != nil { - t.Fatal(err) - } - if err := tree.Insert(ReferencedValue{Value: []byte("world")}, MemoryPointer{Offset: 2}); err != nil { - t.Fatal(err) - } - if err := tree.compact(); err != nil { - t.Fatal(err) - } - v, found, err := tree.Find([]byte("world")) + p, err := NewPageFile(b) if err != nil { t.Fatal(err) } - if !found { - t.Fatal("expected to find key") - } - if v.Offset != 2 { - t.Fatalf("expected value 2, got %d", v) - } - }) - - t.Run("split root", func(t *testing.T) { - b := newSeekableBuffer() - tree := NewBPTree(b, &testMetaPage{}, 4096) + tree := NewBPTree(p, &testMetaPage{}) if err := tree.Insert(ReferencedValue{Value: []byte("hello")}, MemoryPointer{Offset: 1}); err != nil { t.Fatal(err) } @@ -121,9 +112,6 @@ func TestBPTree(t *testing.T) { if err := tree.Insert(ReferencedValue{Value: []byte("cooow")}, MemoryPointer{Offset: 4}); err != nil { t.Fatal(err) } - if err := tree.compact(); err != nil { - t.Fatal(err) - } v1, f1, err := tree.Find([]byte("hello")) if err != nil { t.Fatal(err) @@ -168,47 +156,45 @@ func TestBPTree(t *testing.T) { t.Run("split intermediate", func(t *testing.T) { b := newSeekableBuffer() - tree := NewBPTree(b, &testMetaPage{}, 2) + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree := NewBPTree(p, &testMetaPage{}) if err := tree.Insert(ReferencedValue{Value: []byte{0x05}}, MemoryPointer{Offset: 5}); err != nil { t.Fatal(err) } - fmt.Printf("inserted a\n") - fmt.Printf(tree.String()) if err := tree.Insert(ReferencedValue{Value: []byte{0x15}}, MemoryPointer{Offset: 15}); err != nil { t.Fatal(err) } - fmt.Printf("inserted b\n") - fmt.Printf(tree.String()) if err := tree.Insert(ReferencedValue{Value: []byte{0x25}}, MemoryPointer{Offset: 25}); err != nil { t.Fatal(err) } - fmt.Printf("inserted c\n") - fmt.Printf(tree.String()) if err := tree.Insert(ReferencedValue{Value: []byte{0x35}}, MemoryPointer{Offset: 35}); err != nil { t.Fatal(err) } - fmt.Printf("inserted d\n") - fmt.Printf(tree.String()) if err := tree.Insert(ReferencedValue{Value: []byte{0x45}}, MemoryPointer{Offset: 45}); err != nil { t.Fatal(err) } - fmt.Printf("inserted e\n") - fmt.Printf(tree.String()) }) t.Run("insertion test", func(t *testing.T) { b := newSeekableBuffer() - tree := NewBPTree(b, &testMetaPage{}, 512) - for i := 0; i < 10240; i++ { - buf := make([]byte, 4) - binary.BigEndian.PutUint32(buf, uint32(i)) + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree := NewBPTree(p, &testMetaPage{}) + for i := 0; i < 16384; i++ { + buf := make([]byte, 8) + binary.BigEndian.PutUint64(buf, uint64(i)) if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i)}); err != nil { t.Fatal(err) } } - for i := 0; i < 10240; i++ { - buf := make([]byte, 4) - binary.BigEndian.PutUint32(buf, uint32(i)) + for i := 0; i < 16384; i++ { + buf := make([]byte, 8) + binary.BigEndian.PutUint64(buf, uint64(i)) v, found, err := tree.Find(buf) if err != nil { t.Fatal(err) diff --git a/pkg/btree/multi.go b/pkg/btree/multi.go index 6a646c98..90024c65 100644 --- a/pkg/btree/multi.go +++ b/pkg/btree/multi.go @@ -7,12 +7,12 @@ import ( ) type LinkedMetaPage struct { - rws io.ReadWriteSeeker + rws ReadWriteSeekPager offset uint64 } func (m *LinkedMetaPage) Root() (MemoryPointer, error) { - if _, err := m.rws.Seek(int64(m.offset), 0); err != nil { + if _, err := m.rws.Seek(int64(m.offset), io.SeekStart); err != nil { return MemoryPointer{}, err } var mp MemoryPointer @@ -20,14 +20,14 @@ func (m *LinkedMetaPage) Root() (MemoryPointer, error) { } func (m *LinkedMetaPage) SetRoot(mp MemoryPointer) error { - if _, err := m.rws.Seek(int64(m.offset), 0); err != nil { + if _, err := m.rws.Seek(int64(m.offset), io.SeekStart); err != nil { return err } return binary.Write(m.rws, binary.LittleEndian, mp) } func (m *LinkedMetaPage) Metadata() (MemoryPointer, error) { - if _, err := m.rws.Seek(int64(m.offset)+12, 0); err != nil { + if _, err := m.rws.Seek(int64(m.offset)+12, io.SeekStart); err != nil { return MemoryPointer{}, err } var mp MemoryPointer @@ -35,14 +35,14 @@ func (m *LinkedMetaPage) Metadata() (MemoryPointer, error) { } func (m *LinkedMetaPage) SetMetadata(mp MemoryPointer) error { - if _, err := m.rws.Seek(int64(m.offset)+12, 0); err != nil { + if _, err := m.rws.Seek(int64(m.offset)+12, io.SeekStart); err != nil { return err } return binary.Write(m.rws, binary.LittleEndian, mp) } func (m *LinkedMetaPage) Next() (*LinkedMetaPage, error) { - if _, err := m.rws.Seek(int64(m.offset)+24, 0); err != nil { + if _, err := m.rws.Seek(int64(m.offset)+24, io.SeekStart); err != nil { return nil, err } var next MemoryPointer @@ -64,7 +64,7 @@ func (m *LinkedMetaPage) AddNext() (*LinkedMetaPage, error) { if curr != nil { return nil, errors.New("next pointer is not zero") } - offset, err := m.rws.Seek(0, io.SeekEnd) + offset, err := m.rws.NewPage() if err != nil { return nil, err } @@ -73,7 +73,7 @@ func (m *LinkedMetaPage) AddNext() (*LinkedMetaPage, error) { return nil, err } // save the next pointer - if _, err := m.rws.Seek(int64(m.offset)+24, 0); err != nil { + if _, err := m.rws.Seek(int64(m.offset)+24, io.SeekStart); err != nil { return nil, err } if err := binary.Write(m.rws, binary.LittleEndian, next.offset); err != nil { @@ -105,6 +105,10 @@ func (m *LinkedMetaPage) Reset() error { return nil } -func NewMultiBPTree(t ReadWriteSeekTruncater) *LinkedMetaPage { - return &LinkedMetaPage{rws: t, offset: 0} +func NewMultiBPTree(t ReadWriteSeekPager) (*LinkedMetaPage, error) { + offset, err := t.NewPage() + if err != nil { + return nil, err + } + return &LinkedMetaPage{rws: t, offset: uint64(offset)}, nil } diff --git a/pkg/btree/multi_test.go b/pkg/btree/multi_test.go index 82641879..cca2228b 100644 --- a/pkg/btree/multi_test.go +++ b/pkg/btree/multi_test.go @@ -8,7 +8,14 @@ import ( func TestMultiBPTree(t *testing.T) { t.Run("empty tree", func(t *testing.T) { b := newSeekableBuffer() - tree := NewMultiBPTree(b) + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree, err := NewMultiBPTree(p) + if err != nil { + t.Fatal(err) + } exists, err := tree.Exists() if err != nil { t.Fatal(err) @@ -20,7 +27,14 @@ func TestMultiBPTree(t *testing.T) { t.Run("reset tree", func(t *testing.T) { b := newSeekableBuffer() - tree := NewMultiBPTree(b) + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree, err := NewMultiBPTree(p) + if err != nil { + t.Fatal(err) + } if err := tree.Reset(); err != nil { t.Fatal(err) } @@ -32,9 +46,6 @@ func TestMultiBPTree(t *testing.T) { t.Fatal("expected found") } mp := tree.MemoryPointer() - if mp.Offset != 0 { - t.Fatalf("expected offset 0, got %d", mp.Offset) - } if mp.Length != 36 { t.Fatalf("expected length 36, got %d", mp.Length) } @@ -42,7 +53,14 @@ func TestMultiBPTree(t *testing.T) { t.Run("insert a second page", func(t *testing.T) { b := newSeekableBuffer() - tree := NewMultiBPTree(b) + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree, err := NewMultiBPTree(p) + if err != nil { + t.Fatal(err) + } if err := tree.Reset(); err != nil { t.Fatal(err) } @@ -50,9 +68,6 @@ func TestMultiBPTree(t *testing.T) { if err != nil { t.Fatal(err) } - if next1.MemoryPointer().Offset != 36 { - t.Fatalf("expected offset 36, got %d", next1) - } if next1.MemoryPointer().Length != 36 { t.Fatalf("expected length 36, got %d", next1) } @@ -60,13 +75,14 @@ func TestMultiBPTree(t *testing.T) { if err != nil { t.Fatal(err) } - if next2.MemoryPointer().Offset != 72 { - t.Fatalf("expected offset 72, got %d", next2) - } if next2.MemoryPointer().Length != 36 { t.Fatalf("expected length 36, got %d", next2) } + if next1.MemoryPointer().Offset == next2.MemoryPointer().Offset { + t.Fatalf("expected different offsets, got %d", next1.MemoryPointer().Offset) + } + // check the first page m1, err := tree.Next() if err != nil { @@ -79,7 +95,14 @@ func TestMultiBPTree(t *testing.T) { t.Run("duplicate next pointer", func(t *testing.T) { b := newSeekableBuffer() - tree := NewMultiBPTree(b) + p, err := NewPageFile(b) + if err != nil { + t.Fatal(err) + } + tree, err := NewMultiBPTree(p) + if err != nil { + t.Fatal(err) + } if err := tree.Reset(); err != nil { t.Fatal(err) } @@ -87,9 +110,6 @@ func TestMultiBPTree(t *testing.T) { if err != nil { t.Fatal(err) } - if next1.MemoryPointer().Offset != 36 { - t.Fatalf("expected offset 36, got %d", next1) - } if next1.MemoryPointer().Length != 36 { t.Fatalf("expected length 36, got %d", next1) } diff --git a/pkg/btree/pagefile.go b/pkg/btree/pagefile.go index 271b4469..a1ff316b 100644 --- a/pkg/btree/pagefile.go +++ b/pkg/btree/pagefile.go @@ -7,16 +7,18 @@ import ( "log" ) -type ReadWritePager interface { - io.ReadWriter +type ReadWriteSeekPager interface { + io.ReadWriteSeeker NewPage() (int64, error) FreePage(int64) error + + PageSize() int } type PageFile struct { io.ReadWriteSeeker - PageSize int + pageSize int // local cache of free pages to avoid reading from disk too often. freePageIndexes [512]int64 @@ -38,7 +40,7 @@ func NewPageFile(rws io.ReadWriteSeeker) (*PageFile, error) { } pf := &PageFile{ ReadWriteSeeker: rws, - PageSize: pageSizeBytes, + pageSize: pageSizeBytes, } if err == io.EOF { // allocate one page for the free page indexes @@ -81,9 +83,9 @@ func (pf *PageFile) NewPage() (int64, error) { } // if the offset is not a multiple of the page size, we need to pad the file // with zeros to the next page boundary. - if pf.PageSize > 0 && offset%int64(pf.PageSize) != 0 { + if pf.pageSize > 0 && offset%int64(pf.pageSize) != 0 { // Calculate the number of bytes to pad - pad := int64(pf.PageSize) - (offset % int64(pf.PageSize)) + pad := int64(pf.pageSize) - (offset % int64(pf.pageSize)) // Write the padding if _, err := pf.Write(make([]byte, pad)); err != nil { return 0, err @@ -94,7 +96,7 @@ func (pf *PageFile) NewPage() (int64, error) { } func (pf *PageFile) FreePage(offset int64) error { - if offset%int64(pf.PageSize) != 0 { + if offset%int64(pf.pageSize) != 0 { return errors.New("offset is not a multiple of the page size") } // find the last nonzero free page index and insert it after that @@ -114,3 +116,7 @@ func (pf *PageFile) FreePage(offset int64) error { } return errors.New("too many free pages") } + +func (pf *PageFile) PageSize() int { + return pf.pageSize +}