Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

private changes in bann #517

Draft
wants to merge 8 commits into
base: bann-base
Choose a base branch
from
Draft
6 changes: 5 additions & 1 deletion include/abstract_data_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include "types.h"
#include "windows_customizations.h"
#include "distance.h"
#include "aligned_file_reader.h"


namespace diskann
{
Expand All @@ -21,13 +23,15 @@ template <typename data_t> class AbstractDataStore
virtual ~AbstractDataStore() = default;

// Return number of points returned
virtual location_t load(const std::string &filename) = 0;
virtual location_t load(const std::string &filename, size_t offset) = 0;
virtual location_t load(AlignedFileReader &reader, size_t offset) = 0;

// Why does store take num_pts? Since store only has capacity, but we allow
// resizing we can end up in a situation where the store has spare capacity.
// To optimize disk utilization, we pass the number of points that are "true"
// points, so that the store can discard the empty locations before saving.
virtual size_t save(const std::string &filename, const location_t num_pts) = 0;
virtual size_t save(std::ofstream &writer, const location_t num_pts, size_t offset) = 0;

DISKANN_DLLEXPORT virtual location_t capacity() const;

Expand Down
15 changes: 13 additions & 2 deletions include/abstract_graph_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <vector>
#include "types.h"

class AlignedFileReader;

namespace diskann
{

Expand All @@ -21,11 +23,20 @@ class AbstractGraphStore
virtual ~AbstractGraphStore() = default;

// returns tuple of <nodes_read, start, num_frozen_points>
virtual std::tuple<uint32_t, uint32_t, size_t> load(const std::string &index_path_prefix,
const size_t num_points) = 0;
#ifdef EXEC_ENV_OLS
virtual std::tuple<uint32_t, uint32_t, size_t> load(AlignedFileReader &reader, const size_t num_points,
size_t offset) = 0;
#else
virtual std::tuple<uint32_t, uint32_t, size_t> load(const std::string &index_path_prefix, const size_t num_points,
size_t offset) = 0;
#endif

virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_fz_points,
const uint32_t start) = 0;

virtual int store(std::ofstream &writer, const size_t num_points, const size_t num_fz_points, const uint32_t start,
size_t offset) = 0;

// not synchronised, user should use lock when necvessary.
virtual const std::vector<location_t> &get_neighbours(const location_t i) const = 0;
virtual void add_neighbour(const location_t i, location_t neighbour_id) = 0;
Expand Down
5 changes: 5 additions & 0 deletions include/aligned_file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,9 @@ class AlignedFileReader
// process batch of aligned requests in parallel
// NOTE :: blocking call
virtual void read(std::vector<AlignedRead> &read_reqs, IOContext &ctx, bool async = false) = 0;

#ifdef USE_BING_INFRA
// wait for completion of one request in a batch of requests
virtual void wait(IOContext &ctx, int &completedIndex) = 0;
#endif
};
2 changes: 1 addition & 1 deletion include/defaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const uint32_t NUM_FROZEN_POINTS_STATIC = 0;
const uint32_t NUM_FROZEN_POINTS_DYNAMIC = 1;

// In-mem index related limits
const float GRAPH_SLACK_FACTOR = 1.3;
const float GRAPH_SLACK_FACTOR = 1.3f;

// SSD Index related limits
const uint64_t MAX_GRAPH_DEGREE = 512;
Expand Down
1 change: 1 addition & 0 deletions include/distance.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once
#include "windows_customizations.h"
#include <cstdint>
#include <cstring>

namespace diskann
Expand Down
10 changes: 6 additions & 4 deletions include/in_mem_data_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ template <typename data_t> class InMemDataStore : public AbstractDataStore<data_
InMemDataStore(const location_t capacity, const size_t dim, std::unique_ptr<Distance<data_t>> distance_fn);
virtual ~InMemDataStore();

virtual location_t load(const std::string &filename) override;
virtual size_t save(const std::string &filename, const location_t num_points) override;
virtual location_t load(const std::string &filename, size_t offset = 0) override;
virtual location_t load(AlignedFileReader &reader, size_t offset = 0) override;
virtual size_t save(const std::string &filename, const location_t num_pts) override;
virtual size_t save(std::ofstream &writer, const location_t num_pts, size_t offset) override;

virtual size_t get_aligned_dim() const override;

Expand Down Expand Up @@ -59,9 +61,9 @@ template <typename data_t> class InMemDataStore : public AbstractDataStore<data_
virtual location_t expand(const location_t new_size) override;
virtual location_t shrink(const location_t new_size) override;

virtual location_t load_impl(const std::string &filename);
virtual location_t load_impl(const std::string &filename, size_t offset);
#ifdef EXEC_ENV_OLS
virtual location_t load_impl(AlignedFileReader &reader);
virtual location_t load_impl(AlignedFileReader &reader, size_t offset);
#endif

private:
Expand Down
23 changes: 16 additions & 7 deletions include/in_mem_graph_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,17 @@ class InMemGraphStore : public AbstractGraphStore
InMemGraphStore(const size_t total_pts, const size_t reserve_graph_degree);

// returns tuple of <nodes_read, start, num_frozen_points>
virtual std::tuple<uint32_t, uint32_t, size_t> load(const std::string &index_path_prefix,
const size_t num_points) override;
#ifdef EXEC_ENV_OLS
virtual std::tuple<uint32_t, uint32_t, size_t> load(AlignedFileReader &reader, const size_t num_points,
size_t offset) override;
#else
virtual std::tuple<uint32_t, uint32_t, size_t> load(const std::string &filename, size_t expected_num_points,
size_t offset);
#endif
virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_frozen_points,
const uint32_t start) override;

virtual int store(std::ofstream &writer, const size_t num_points, const size_t num_fz_points, const uint32_t start,
size_t offset) override;
virtual const std::vector<location_t> &get_neighbours(const location_t i) const override;
virtual void add_neighbour(const location_t i, location_t neighbour_id) override;
virtual void clear_neighbours(const location_t i) override;
Expand All @@ -33,13 +39,16 @@ class InMemGraphStore : public AbstractGraphStore
virtual uint32_t get_max_observed_degree() override;

protected:
virtual std::tuple<uint32_t, uint32_t, size_t> load_impl(const std::string &filename, size_t expected_num_points);
#ifdef EXEC_ENV_OLS
virtual std::tuple<uint32_t, uint32_t, size_t> load_impl(AlignedFileReader &reader, size_t expected_num_points);
virtual std::tuple<uint32_t, uint32_t, size_t> load_impl(AlignedFileReader &reader, size_t expected_num_points,
size_t offset);
#else
virtual std::tuple<uint32_t, uint32_t, size_t> load_impl(const std::string &filename, size_t expected_num_points,
size_t offset);
#endif

int save_graph(const std::string &index_path_prefix, const size_t active_points, const size_t num_frozen_points,
const uint32_t start);
int save_graph(std::ofstream &writer, const size_t active_points, const size_t num_frozen_points,
const uint32_t start, size_t offset);

private:
size_t _max_range_of_graph = 0;
Expand Down
38 changes: 27 additions & 11 deletions include/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@

namespace diskann
{
// This struct is used for storing metadata for save_as_one_file version 1.
struct SaveLoadMetaDataV1
{
uint64_t data_offset;
uint64_t delete_list_offset;
uint64_t tags_offset;
uint64_t graph_offset;

SaveLoadMetaDataV1();
};

inline double estimate_ram_usage(size_t size, uint32_t dim, uint32_t datasize, uint32_t degree)
{
Expand Down Expand Up @@ -57,7 +67,9 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
const size_t num_frozen_pts = 0, const bool dynamic_index = false,
const bool enable_tags = false, const bool concurrent_consolidate = false,
const bool pq_dist_build = false, const size_t num_pq_chunks = 0,
const bool use_opq = false, const bool filtered_index = false);
const bool use_opq = false, const bool filtered_index = false,
bool save_as_one_file = false, uint64_t save_as_one_file_version = 1,
bool load_from_one_file = false, uint64_t load_from_one_file_version = 1);

DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::unique_ptr<AbstractDataStore<T>> data_store,
std::unique_ptr<AbstractGraphStore> graph_store);
Expand All @@ -80,6 +92,7 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
// get some private variables
DISKANN_DLLEXPORT size_t get_num_points();
DISKANN_DLLEXPORT size_t get_max_points();
DISKANN_DLLEXPORT size_t get_num_deleted_points();

DISKANN_DLLEXPORT bool detect_common_filters(uint32_t point_id, bool search_invocation,
const std::vector<LabelT> &incoming_labels);
Expand Down Expand Up @@ -294,7 +307,7 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
// Renumber nodes, update tag and location maps and compact the
// graph, mode = _consolidated_order in case of lazy deletion and
// _compacted_order in case of eager deletion
DISKANN_DLLEXPORT void compact_data();
DISKANN_DLLEXPORT void compact_data(bool forced = false);
DISKANN_DLLEXPORT void compact_frozen_point();

// Remove deleted nodes from adjacency list of node loc
Expand All @@ -313,15 +326,15 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
DISKANN_DLLEXPORT size_t save_tags(std::string filename);
DISKANN_DLLEXPORT size_t save_delete_list(const std::string &filename);
#ifdef EXEC_ENV_OLS
DISKANN_DLLEXPORT size_t load_graph(AlignedFileReader &reader, size_t expected_num_points);
DISKANN_DLLEXPORT size_t load_data(AlignedFileReader &reader);
DISKANN_DLLEXPORT size_t load_tags(AlignedFileReader &reader);
DISKANN_DLLEXPORT size_t load_delete_set(AlignedFileReader &reader);
DISKANN_DLLEXPORT size_t load_graph(AlignedFileReader &reader, size_t expected_num_points, size_t offset = 0);
DISKANN_DLLEXPORT size_t load_data(AlignedFileReader &reader, size_t offset = 0);
DISKANN_DLLEXPORT size_t load_tags(AlignedFileReader &reader, size_t offset = 0);
DISKANN_DLLEXPORT size_t load_delete_set(AlignedFileReader &reader, size_t offset = 0);
#else
DISKANN_DLLEXPORT size_t load_graph(const std::string filename, size_t expected_num_points);
DISKANN_DLLEXPORT size_t load_data(std::string filename0);
DISKANN_DLLEXPORT size_t load_tags(const std::string tag_file_name);
DISKANN_DLLEXPORT size_t load_delete_set(const std::string &filename);
DISKANN_DLLEXPORT size_t load_graph(const std::string filename, size_t expected_num_points, size_t offset = 0);
DISKANN_DLLEXPORT size_t load_data(std::string filename, size_t offset = 0);
DISKANN_DLLEXPORT size_t load_tags(const std::string &filename, size_t offset = 0);
DISKANN_DLLEXPORT size_t load_delete_set(const std::string &filename, size_t offset = 0);
#endif

private:
Expand Down Expand Up @@ -360,7 +373,10 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas

bool _has_built = false;
bool _saturate_graph = false;
bool _save_as_one_file = false; // plan to support in next version
bool _save_as_one_file; // plan to support filtered index in next version.
uint64_t _save_as_one_file_version; // Version used for save index to single file.
bool _load_from_one_file; // Whether to load index from single file.
uint64_t _load_from_one_file_version; // Version used for save index to single file.
bool _dynamic_index = false;
bool _enable_tags = false;
bool _normalize_vecs = false; // Using normalied L2 for cosine.
Expand Down
42 changes: 39 additions & 3 deletions include/index_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ struct IndexConfig
bool concurrent_consolidate;
bool use_opq;
bool filtered_index;
bool save_as_one_file;
uint64_t save_as_one_file_version;
bool load_from_one_file;
uint64_t load_from_one_file_version;

size_t num_pq_chunks;
size_t num_frozen_pts;
Expand All @@ -45,12 +49,15 @@ struct IndexConfig
IndexConfig(DataStoreStrategy data_strategy, GraphStoreStrategy graph_strategy, Metric metric, size_t dimension,
size_t max_points, size_t num_pq_chunks, size_t num_frozen_points, bool dynamic_index, bool enable_tags,
bool pq_dist_build, bool concurrent_consolidate, bool use_opq, bool filtered_index,
std::string &data_type, const std::string &tag_type, const std::string &label_type,
std::shared_ptr<IndexWriteParameters> index_write_params,
bool save_as_one_file, uint64_t save_as_one_file_version, bool load_from_one_file,
uint64_t load_from_one_file_version, std::string &data_type, const std::string &tag_type,
const std::string &label_type, std::shared_ptr<IndexWriteParameters> index_write_params,
std::shared_ptr<IndexSearchParams> index_search_params)
: data_strategy(data_strategy), graph_strategy(graph_strategy), metric(metric), dimension(dimension),
max_points(max_points), dynamic_index(dynamic_index), enable_tags(enable_tags), pq_dist_build(pq_dist_build),
concurrent_consolidate(concurrent_consolidate), use_opq(use_opq), filtered_index(filtered_index),
save_as_one_file(save_as_one_file), save_as_one_file_version(save_as_one_file_version),
load_from_one_file(load_from_one_file), load_from_one_file_version(load_from_one_file_version),
num_pq_chunks(num_pq_chunks), num_frozen_pts(num_frozen_points), label_type(label_type), tag_type(tag_type),
data_type(data_type), index_write_params(index_write_params), index_search_params(index_search_params)
{
Expand Down Expand Up @@ -194,6 +201,30 @@ class IndexConfigBuilder
return *this;
}

IndexConfigBuilder &with_save_as_single_file(bool save_as_one_file)
{
this->_save_as_one_file = save_as_one_file;
return *this;
}

IndexConfigBuilder &with_save_as_single_file_version(uint64_t save_as_one_file_version)
{
this->_save_as_one_file_version = save_as_one_file_version;
return *this;
}

IndexConfigBuilder &with_load_from_single_file(bool load_from_one_file)
{
this->_load_from_one_file = load_from_one_file;
return *this;
}

IndexConfigBuilder &with_load_from_single_file_version(uint64_t load_from_one_file_version)
{
this->_load_from_one_file_version = load_from_one_file_version;
return *this;
}

IndexConfig build()
{
if (_data_type == "" || _data_type.empty())
Expand All @@ -219,7 +250,8 @@ class IndexConfigBuilder

return IndexConfig(_data_strategy, _graph_strategy, _metric, _dimension, _max_points, _num_pq_chunks,
_num_frozen_pts, _dynamic_index, _enable_tags, _pq_dist_build, _concurrent_consolidate,
_use_opq, _filtered_index, _data_type, _tag_type, _label_type, _index_write_params,
_use_opq, _filtered_index, _save_as_one_file, _save_as_one_file_version, _load_from_one_file,
_load_from_one_file_version, _data_type, _tag_type, _label_type, _index_write_params,
_index_search_params);
}

Expand All @@ -240,6 +272,10 @@ class IndexConfigBuilder
bool _concurrent_consolidate = false;
bool _use_opq = false;
bool _filtered_index{defaults::HAS_LABELS};
bool _save_as_one_file;
uint64_t _save_as_one_file_version;
bool _load_from_one_file;
uint64_t _load_from_one_file_version;

size_t _num_pq_chunks = 0;
size_t _num_frozen_pts{defaults::NUM_FROZEN_POINTS_STATIC};
Expand Down
16 changes: 8 additions & 8 deletions include/parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,7 @@ class IndexWriteParameters

{
public:
const uint32_t search_list_size; // L
const uint32_t max_degree; // R
const bool saturate_graph;
const uint32_t max_occlusion_size; // C
const float alpha;
const uint32_t num_threads;
const uint32_t filter_list_size; // Lf

private:
IndexWriteParameters(const uint32_t search_list_size, const uint32_t max_degree, const bool saturate_graph,
const uint32_t max_occlusion_size, const float alpha, const uint32_t num_threads,
const uint32_t filter_list_size)
Expand All @@ -34,6 +26,14 @@ class IndexWriteParameters
{
}

const uint32_t search_list_size; // L
const uint32_t max_degree; // R
const bool saturate_graph;
const uint32_t max_occlusion_size; // C
const float alpha;
const uint32_t num_threads;
const uint32_t filter_list_size; // Lf

friend class IndexWriteParametersBuilder;
};

Expand Down
Loading
Loading