From fd237cf0d84e7733ff15c60cbfa08061ffead63c Mon Sep 17 00:00:00 2001 From: "REDMOND\\ninchen" Date: Fri, 10 Nov 2023 15:54:39 -0800 Subject: [PATCH 1/6] Prototype single file save/load --- include/abstract_data_store.h | 3 +- include/abstract_graph_store.h | 5 +- include/distance.h | 1 + include/in_mem_data_store.h | 9 +- include/in_mem_graph_store.h | 13 +- include/index.h | 30 +++-- include/index_config.h | 25 +++- include/utils.h | 25 +++- src/distance.cpp | 3 + src/in_mem_data_store.cpp | 22 +-- src/in_mem_graph_store.cpp | 60 +++++---- src/index.cpp | 238 +++++++++++++++++++++++++++++---- 12 files changed, 341 insertions(+), 93 deletions(-) diff --git a/include/abstract_data_store.h b/include/abstract_data_store.h index d858c8eef..60eeb6c03 100644 --- a/include/abstract_data_store.h +++ b/include/abstract_data_store.h @@ -21,13 +21,14 @@ template class AbstractDataStore virtual ~AbstractDataStore() = default; // Return number of points returned - virtual location_t load(const std::string &filename) = 0; + virtual location_t load(const std::string &filename, size_t offset) = 0; // Why does store take num_pts? Since store only has capacity, but we allow // resizing we can end up in a situation where the store has spare capacity. // To optimize disk utilization, we pass the number of points that are "true" // points, so that the store can discard the empty locations before saving. virtual size_t save(const std::string &filename, const location_t num_pts) = 0; + virtual size_t save(std::ofstream &writer, const location_t num_pts, size_t offset) = 0; DISKANN_DLLEXPORT virtual location_t capacity() const; diff --git a/include/abstract_graph_store.h b/include/abstract_graph_store.h index 4d6906ca4..110fbc11d 100644 --- a/include/abstract_graph_store.h +++ b/include/abstract_graph_store.h @@ -22,10 +22,13 @@ class AbstractGraphStore // returns tuple of virtual std::tuple load(const std::string &index_path_prefix, - const size_t num_points) = 0; + const size_t num_points, size_t offset) = 0; virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_fz_points, const uint32_t start) = 0; + virtual int store(std::ofstream& writer, const size_t num_points, const size_t num_fz_points, + const uint32_t start, size_t offset) = 0; + // not synchronised, user should use lock when necvessary. virtual const std::vector &get_neighbours(const location_t i) const = 0; virtual void add_neighbour(const location_t i, location_t neighbour_id) = 0; diff --git a/include/distance.h b/include/distance.h index 8b20e586b..065b38231 100644 --- a/include/distance.h +++ b/include/distance.h @@ -1,5 +1,6 @@ #pragma once #include "windows_customizations.h" +#include #include namespace diskann diff --git a/include/in_mem_data_store.h b/include/in_mem_data_store.h index 9b6968b03..b610bb2dd 100644 --- a/include/in_mem_data_store.h +++ b/include/in_mem_data_store.h @@ -24,8 +24,9 @@ template class InMemDataStore : public AbstractDataStore> distance_fn); virtual ~InMemDataStore(); - virtual location_t load(const std::string &filename) override; - virtual size_t save(const std::string &filename, const location_t num_points) override; + virtual location_t load(const std::string &filename, size_t offset = 0) override; + virtual size_t save(const std::string &filename, const location_t num_pts) override; + virtual size_t save(std::ofstream &writer, const location_t num_pts, size_t offset) override; virtual size_t get_aligned_dim() const override; @@ -59,9 +60,9 @@ template class InMemDataStore : public AbstractDataStore virtual std::tuple load(const std::string &index_path_prefix, - const size_t num_points) override; + const size_t num_points, size_t offset) override; virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_frozen_points, const uint32_t start) override; - + virtual int store(std::ofstream &writer, const size_t num_points, const size_t num_fz_points, const uint32_t start, + size_t offset) override; virtual const std::vector &get_neighbours(const location_t i) const override; virtual void add_neighbour(const location_t i, location_t neighbour_id) override; virtual void clear_neighbours(const location_t i) override; @@ -33,13 +34,13 @@ class InMemGraphStore : public AbstractGraphStore virtual uint32_t get_max_observed_degree() override; protected: - virtual std::tuple load_impl(const std::string &filename, size_t expected_num_points); + virtual std::tuple load_impl(const std::string &filename, size_t expected_num_points, size_t offset); #ifdef EXEC_ENV_OLS - virtual std::tuple load_impl(AlignedFileReader &reader, size_t expected_num_points); + virtual std::tuple load_impl(AlignedFileReader &reader, size_t expected_num_points, size_t offset); #endif - int save_graph(const std::string &index_path_prefix, const size_t active_points, const size_t num_frozen_points, - const uint32_t start); + int save_graph(std::ofstream &writer, const size_t active_points, const size_t num_frozen_points, + const uint32_t start, size_t offset); private: size_t _max_range_of_graph = 0; diff --git a/include/index.h b/include/index.h index e7966461c..e5af15599 100644 --- a/include/index.h +++ b/include/index.h @@ -28,6 +28,14 @@ namespace diskann { +// This struct is used for storing metadata for save_as_one_file version 1. +struct SaveLoadMetaDataV1 +{ + uint64_t data_offset; + uint64_t delete_list_offset; + uint64_t tags_offset; + uint64_t graph_offset; +}; inline double estimate_ram_usage(size_t size, uint32_t dim, uint32_t datasize, uint32_t degree) { @@ -57,7 +65,8 @@ template clas const size_t num_frozen_pts = 0, const bool dynamic_index = false, const bool enable_tags = false, const bool concurrent_consolidate = false, const bool pq_dist_build = false, const size_t num_pq_chunks = 0, - const bool use_opq = false, const bool filtered_index = false); + const bool use_opq = false, const bool filtered_index = false, + bool save_as_one_file = false, uint64_t save_as_one_file_version = 1); DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::unique_ptr> data_store, std::unique_ptr graph_store); @@ -313,15 +322,15 @@ template clas DISKANN_DLLEXPORT size_t save_tags(std::string filename); DISKANN_DLLEXPORT size_t save_delete_list(const std::string &filename); #ifdef EXEC_ENV_OLS - DISKANN_DLLEXPORT size_t load_graph(AlignedFileReader &reader, size_t expected_num_points); - DISKANN_DLLEXPORT size_t load_data(AlignedFileReader &reader); - DISKANN_DLLEXPORT size_t load_tags(AlignedFileReader &reader); - DISKANN_DLLEXPORT size_t load_delete_set(AlignedFileReader &reader); + DISKANN_DLLEXPORT size_t load_graph(AlignedFileReader &reader, size_t expected_num_points, size_t offset = 0); + DISKANN_DLLEXPORT size_t load_data(AlignedFileReader &reader, size_t offset = 0); + DISKANN_DLLEXPORT size_t load_tags(AlignedFileReader &reader, size_t offset = 0); + DISKANN_DLLEXPORT size_t load_delete_set(AlignedFileReader &reader, size_t offset = 0); #else - DISKANN_DLLEXPORT size_t load_graph(const std::string filename, size_t expected_num_points); - DISKANN_DLLEXPORT size_t load_data(std::string filename0); - DISKANN_DLLEXPORT size_t load_tags(const std::string tag_file_name); - DISKANN_DLLEXPORT size_t load_delete_set(const std::string &filename); + DISKANN_DLLEXPORT size_t load_graph(const std::string filename, size_t expected_num_points, size_t offset = 0); + DISKANN_DLLEXPORT size_t load_data(std::string filename, size_t offset = 0); + DISKANN_DLLEXPORT size_t load_tags(const std::string &filename, size_t offset = 0); + DISKANN_DLLEXPORT size_t load_delete_set(const std::string &filename, size_t offset = 0); #endif private: @@ -360,7 +369,8 @@ template clas bool _has_built = false; bool _saturate_graph = false; - bool _save_as_one_file = false; // plan to support in next version + bool _save_as_one_file; // plan to support filtered index in next version. + uint64_t _save_as_one_file_version; // Version used for save index as single file. bool _dynamic_index = false; bool _enable_tags = false; bool _normalize_vecs = false; // Using normalied L2 for cosine. diff --git a/include/index_config.h b/include/index_config.h index 452498b01..8b8f7e1f8 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -28,6 +28,8 @@ struct IndexConfig bool concurrent_consolidate; bool use_opq; bool filtered_index; + bool save_as_one_file; + uint64_t save_as_one_file_version; size_t num_pq_chunks; size_t num_frozen_pts; @@ -44,13 +46,14 @@ struct IndexConfig private: IndexConfig(DataStoreStrategy data_strategy, GraphStoreStrategy graph_strategy, Metric metric, size_t dimension, size_t max_points, size_t num_pq_chunks, size_t num_frozen_points, bool dynamic_index, bool enable_tags, - bool pq_dist_build, bool concurrent_consolidate, bool use_opq, bool filtered_index, - std::string &data_type, const std::string &tag_type, const std::string &label_type, + bool pq_dist_build, bool concurrent_consolidate, bool use_opq, bool filtered_index, bool save_as_one_file, + uint64_t save_as_one_file_version, std::string &data_type, const std::string &tag_type, const std::string &label_type, std::shared_ptr index_write_params, std::shared_ptr index_search_params) : data_strategy(data_strategy), graph_strategy(graph_strategy), metric(metric), dimension(dimension), max_points(max_points), dynamic_index(dynamic_index), enable_tags(enable_tags), pq_dist_build(pq_dist_build), concurrent_consolidate(concurrent_consolidate), use_opq(use_opq), filtered_index(filtered_index), + save_as_one_file(save_as_one_file), save_as_one_file_version(save_as_one_file_version), num_pq_chunks(num_pq_chunks), num_frozen_pts(num_frozen_points), label_type(label_type), tag_type(tag_type), data_type(data_type), index_write_params(index_write_params), index_search_params(index_search_params) { @@ -194,6 +197,18 @@ class IndexConfigBuilder return *this; } + IndexConfigBuilder &with_save_as_single_file(bool save_as_one_file) + { + this->_save_as_one_file = save_as_one_file; + return *this; + } + + IndexConfigBuilder &with_save_as_single_file_version(uint64_t save_as_one_file_version) + { + this->_save_as_one_file_version = save_as_one_file_version; + return *this; + } + IndexConfig build() { if (_data_type == "" || _data_type.empty()) @@ -219,8 +234,8 @@ class IndexConfigBuilder return IndexConfig(_data_strategy, _graph_strategy, _metric, _dimension, _max_points, _num_pq_chunks, _num_frozen_pts, _dynamic_index, _enable_tags, _pq_dist_build, _concurrent_consolidate, - _use_opq, _filtered_index, _data_type, _tag_type, _label_type, _index_write_params, - _index_search_params); + _use_opq, _filtered_index, _save_as_one_file, _save_as_one_file_version, _data_type, _tag_type, + _label_type, _index_write_params, _index_search_params); } IndexConfigBuilder(const IndexConfigBuilder &) = delete; @@ -240,6 +255,8 @@ class IndexConfigBuilder bool _concurrent_consolidate = false; bool _use_opq = false; bool _filtered_index{defaults::HAS_LABELS}; + bool _save_as_one_file; + uint64_t _save_as_one_file_version; size_t _num_pq_chunks = 0; size_t _num_frozen_pts{defaults::NUM_FROZEN_POINTS_STATIC}; diff --git a/include/utils.h b/include/utils.h index bb03d13f1..4fd0dd3c1 100644 --- a/include/utils.h +++ b/include/utils.h @@ -719,8 +719,16 @@ inline size_t save_bin(const std::string &filename, T *data, size_t npts, size_t { std::ofstream writer; open_file_to_write(writer, filename); + diskann::cout << "Writing bin file: " << filename.c_str() << std::endl; + size_t bytes_written = save_bin(writer, data, npts, ndims, offset); + writer.close(); + diskann::cout << "Close file " << filename << "." << std::endl; + return bytes_written; +} - diskann::cout << "Writing bin: " << filename.c_str() << std::endl; +template +inline size_t save_bin(std::ofstream &writer, T *data, size_t npts, size_t ndims, size_t offset) +{ writer.seekp(offset, writer.beg); int npts_i32 = (int)npts, ndims_i32 = (int)ndims; size_t bytes_written = npts * ndims * sizeof(T) + 2 * sizeof(uint32_t); @@ -730,7 +738,6 @@ inline size_t save_bin(const std::string &filename, T *data, size_t npts, size_t << std::endl; writer.write((char *)data, npts * ndims * sizeof(T)); - writer.close(); diskann::cout << "Finished writing bin." << std::endl; return bytes_written; } @@ -944,6 +951,16 @@ inline size_t save_data_in_base_dimensions(const std::string &filename, T *data, { std::ofstream writer; //(filename, std::ios::binary | std::ios::out); open_file_to_write(writer, filename); + size_t file_size = save_data_in_base_dimensions(writer, data, npts, ndims, aligned_dim, offset); + writer.close(); + + return file_size; +} + +template +inline size_t save_data_in_base_dimensions(std::ofstream &writer, T *data, size_t npts, size_t ndims, + size_t aligned_dim, size_t offset) +{ int npts_i32 = (int)npts, ndims_i32 = (int)ndims; size_t bytes_written = 2 * sizeof(uint32_t) + npts * ndims * sizeof(T); writer.seekp(offset, writer.beg); @@ -953,7 +970,6 @@ inline size_t save_data_in_base_dimensions(const std::string &filename, T *data, { writer.write((char *)(data + i * aligned_dim), ndims * sizeof(T)); } - writer.close(); return bytes_written; } @@ -968,11 +984,12 @@ inline void copy_aligned_data_from_file(const char *bin_file, T *&data, size_t & throw diskann::ANNException("Null pointer passed to copy_aligned_data_from_file function", -1, __FUNCSIG__, __FILE__, __LINE__); } + std::ifstream reader; reader.exceptions(std::ios::badbit | std::ios::failbit); reader.open(bin_file, std::ios::binary); - reader.seekg(offset, reader.beg); + reader.seekg(offset, reader.beg); int npts_i32, dim_i32; reader.read((char *)&npts_i32, sizeof(int)); reader.read((char *)&dim_i32, sizeof(int)); diff --git a/src/distance.cpp b/src/distance.cpp index 31ab9d3ff..f1c1a317a 100644 --- a/src/distance.cpp +++ b/src/distance.cpp @@ -730,4 +730,7 @@ template DISKANN_DLLEXPORT class SlowDistanceL2; template DISKANN_DLLEXPORT class SlowDistanceL2; template DISKANN_DLLEXPORT class SlowDistanceL2; +template DISKANN_DLLEXPORT Distance *get_distance_function(Metric m); +template DISKANN_DLLEXPORT Distance *get_distance_function(Metric m); +template DISKANN_DLLEXPORT Distance *get_distance_function(Metric m); } // namespace diskann diff --git a/src/in_mem_data_store.cpp b/src/in_mem_data_store.cpp index 7d02bba17..8e842d159 100644 --- a/src/in_mem_data_store.cpp +++ b/src/in_mem_data_store.cpp @@ -37,13 +37,13 @@ template size_t InMemDataStore::get_alignment_factor() return _distance_fn->get_required_alignment(); } -template location_t InMemDataStore::load(const std::string &filename) +template location_t InMemDataStore::load(const std::string &filename, size_t offset) { - return load_impl(filename); + return load_impl(filename, offset); } #ifdef EXEC_ENV_OLS -template location_t InMemDataStore::load_impl(AlignedFileReader &reader) +template location_t InMemDataStore::load_impl(AlignedFileReader &reader, size_t offset) { size_t file_dim, file_num_points; @@ -69,7 +69,7 @@ template location_t InMemDataStore::load_impl(AlignedF } #endif -template location_t InMemDataStore::load_impl(const std::string &filename) +template location_t InMemDataStore::load_impl(const std::string &filename, size_t offset) { size_t file_dim, file_num_points; if (!file_exists(filename)) @@ -80,7 +80,7 @@ template location_t InMemDataStore::load_impl(const st aligned_free(_data); throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } - diskann::get_bin_metadata(filename, file_num_points, file_dim); + diskann::get_bin_metadata(filename, file_num_points, file_dim, offset); if (file_dim != this->_dim) { @@ -97,14 +97,20 @@ template location_t InMemDataStore::load_impl(const st this->resize((location_t)file_num_points); } - copy_aligned_data_from_file(filename.c_str(), _data, file_num_points, file_dim, _aligned_dim); + copy_aligned_data_from_file(filename.c_str(), _data, file_num_points, file_dim, _aligned_dim, offset); return (location_t)file_num_points; } -template size_t InMemDataStore::save(const std::string &filename, const location_t num_points) +template size_t InMemDataStore::save(const std::string &filename, const location_t num_pts) { - return save_data_in_base_dimensions(filename, _data, num_points, this->get_dims(), this->get_aligned_dim(), 0U); + return save_data_in_base_dimensions(filename, _data, num_pts, this->get_dims(), this->get_aligned_dim(), 0U); +} + +template +size_t InMemDataStore::save(std::ofstream &writer, const location_t num_pts, size_t offset) +{ + return save_data_in_base_dimensions(writer, _data, num_pts, this->get_dims(), this->get_aligned_dim(), offset); } template void InMemDataStore::populate_data(const data_t *vectors, const location_t num_pts) diff --git a/src/in_mem_graph_store.cpp b/src/in_mem_graph_store.cpp index c12b2514e..2d0150aed 100644 --- a/src/in_mem_graph_store.cpp +++ b/src/in_mem_graph_store.cpp @@ -17,15 +17,27 @@ InMemGraphStore::InMemGraphStore(const size_t total_pts, const size_t reserve_gr } std::tuple InMemGraphStore::load(const std::string &index_path_prefix, - const size_t num_points) + const size_t num_points, size_t offset) { - return load_impl(index_path_prefix, num_points); + return load_impl(index_path_prefix, num_points, offset); } int InMemGraphStore::store(const std::string &index_path_prefix, const size_t num_points, const size_t num_frozen_points, const uint32_t start) { - return save_graph(index_path_prefix, num_points, num_frozen_points, start); + std::ofstream writer; + open_file_to_write(writer, index_path_prefix); + int file_size = store(writer, num_points, num_frozen_points, start, 0U); + writer.close(); + + return file_size; +} + +int InMemGraphStore::store(std::ofstream &writer, const size_t num_points, + const size_t num_frozen_points, const uint32_t start, size_t offset) +{ + return save_graph(writer, num_points, num_frozen_points, start, offset); } + const std::vector &InMemGraphStore::get_neighbours(const location_t i) const { return _graph.at(i); @@ -71,7 +83,7 @@ void InMemGraphStore::clear_graph() } #ifdef EXEC_ENV_OLS -std::tuple InMemGraphStore::load_impl(AlignedFileReader &reader, size_t expected_num_points) +std::tuple InMemGraphStore::load_impl(AlignedFileReader &reader, size_t expected_num_points, size_t offset) { size_t expected_file_size; size_t file_frozen_pts; @@ -80,7 +92,7 @@ std::tuple InMemGraphStore::load_impl(AlignedFileRea auto max_points = get_max_points(); int header_size = 2 * sizeof(size_t) + 2 * sizeof(uint32_t); std::unique_ptr header = std::make_unique(header_size); - read_array(reader, header.get(), header_size); + read_array(reader, header.get(), header_size, offset); expected_file_size = *((size_t *)header.get()); _max_observed_degree = *((uint32_t *)(header.get() + sizeof(size_t))); @@ -103,7 +115,7 @@ std::tuple InMemGraphStore::load_impl(AlignedFileRea uint32_t nodes_read = 0; size_t cc = 0; - size_t graph_offset = header_size; + size_t graph_offset = header_size + offset; while (nodes_read < expected_num_points) { uint32_t k; @@ -133,17 +145,16 @@ std::tuple InMemGraphStore::load_impl(AlignedFileRea #endif std::tuple InMemGraphStore::load_impl(const std::string &filename, - size_t expected_num_points) + size_t expected_num_points, size_t offset) { size_t expected_file_size; size_t file_frozen_pts; uint32_t start; - size_t file_offset = 0; // will need this for single file format support std::ifstream in; in.exceptions(std::ios::badbit | std::ios::failbit); in.open(filename, std::ios::binary); - in.seekg(file_offset, in.beg); + in.seekg(offset, in.beg); in.read((char *)&expected_file_size, sizeof(size_t)); in.read((char *)&_max_observed_degree, sizeof(uint32_t)); in.read((char *)&start, sizeof(uint32_t)); @@ -197,35 +208,32 @@ std::tuple InMemGraphStore::load_impl(const std::str return std::make_tuple(nodes_read, start, file_frozen_pts); } -int InMemGraphStore::save_graph(const std::string &index_path_prefix, const size_t num_points, - const size_t num_frozen_points, const uint32_t start) +int InMemGraphStore::save_graph(std::ofstream &writer, const size_t num_points, + const size_t num_frozen_points, const uint32_t start, size_t offset) { - std::ofstream out; - open_file_to_write(out, index_path_prefix); - - size_t file_offset = 0; - out.seekp(file_offset, out.beg); + writer.seekp(offset, writer.beg); size_t index_size = 24; uint32_t max_degree = 0; - out.write((char *)&index_size, sizeof(uint64_t)); - out.write((char *)&_max_observed_degree, sizeof(uint32_t)); + writer.write((char *)&index_size, sizeof(uint64_t)); + writer.write((char *)&_max_observed_degree, sizeof(uint32_t)); uint32_t ep_u32 = start; - out.write((char *)&ep_u32, sizeof(uint32_t)); - out.write((char *)&num_frozen_points, sizeof(size_t)); + writer.write((char *)&ep_u32, sizeof(uint32_t)); + writer.write((char *)&num_frozen_points, sizeof(size_t)); // Note: num_points = _nd + _num_frozen_points for (uint32_t i = 0; i < num_points; i++) { uint32_t GK = (uint32_t)_graph[i].size(); - out.write((char *)&GK, sizeof(uint32_t)); - out.write((char *)_graph[i].data(), GK * sizeof(uint32_t)); + writer.write((char *)&GK, sizeof(uint32_t)); + writer.write((char *)_graph[i].data(), GK * sizeof(uint32_t)); max_degree = _graph[i].size() > max_degree ? (uint32_t)_graph[i].size() : max_degree; index_size += (size_t)(sizeof(uint32_t) * (GK + 1)); } - out.seekp(file_offset, out.beg); - out.write((char *)&index_size, sizeof(uint64_t)); - out.write((char *)&max_degree, sizeof(uint32_t)); - out.close(); + + writer.seekp(offset, writer.beg); + writer.write((char *)&index_size, sizeof(uint64_t)); + writer.write((char *)&max_degree, sizeof(uint32_t)); + return (int)index_size; } diff --git a/src/index.cpp b/src/index.cpp index 3de3a3b7f..a8cd16211 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -34,8 +34,8 @@ Index::Index(const IndexConfig &index_config, std::unique_ptr), _conc_consolidate(index_config.concurrent_consolidate) + _filtered_index(index_config.filtered_index),_save_as_one_file(index_config.save_as_one_file), _save_as_one_file_version(index_config.save_as_one_file_version), + _num_pq_chunks(index_config.num_pq_chunks), _delete_set(new tsl::robin_set), _conc_consolidate(index_config.concurrent_consolidate) { if (_dynamic_index && !_enable_tags) { @@ -125,7 +125,7 @@ Index::Index(Metric m, const size_t dim, const size_t max_point const std::shared_ptr index_search_params, const size_t num_frozen_pts, const bool dynamic_index, const bool enable_tags, const bool concurrent_consolidate, const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq, - const bool filtered_index) + const bool filtered_index, bool save_as_one_file, uint64_t save_as_one_file_version) : Index(IndexConfigBuilder() .with_metric(m) .with_dimension(dim) @@ -141,6 +141,8 @@ Index::Index(Metric m, const size_t dim, const size_t max_point .is_use_opq(use_opq) .is_filtered(filtered_index) .with_data_type(diskann_type_to_name()) + .with_save_as_single_file(save_as_one_file) + .with_save_as_single_file_version(save_as_one_file_version) .build(), IndexFactory::construct_datastore( DataStoreStrategy::MEMORY, @@ -379,9 +381,100 @@ void Index::save(const char *filename, bool compact_before_save } else { - diskann::cout << "Save index in a single file currently not supported. " - "Not saving the index." - << std::endl; + if (_filtered_index) + { + diskann::cout << "Save index in a single file currently not supported for filtered index. " + "Not saving the index." + << std::endl; + } + else + { + if (_save_as_one_file_version == 1) + { + std::ofstream writer; + open_file_to_write(writer, filename); + + // Save version. + writer.write((char *)&_save_as_one_file_version, sizeof(uint64_t)); + size_t curr_pos = sizeof(uint64_t); + + // Placeholder for metadata. + // This will be filled at end; + SaveLoadMetaDataV1 metadata; + const size_t meta_data_start = curr_pos; + curr_pos += sizeof(SaveLoadMetaDataV1); + + // Save data. + metadata.data_offset = static_cast(curr_pos); + curr_pos += _data_store->save(writer, (location_t)(_nd + _num_frozen_pts), curr_pos); + + // Save delete list. + { + if (_delete_set->size() == 0) + { + metadata.delete_list_offset = static_cast(curr_pos); + } + else + { + std::unique_ptr delete_list = std::make_unique(_delete_set->size()); + uint32_t i = 0; + for (auto &del : *_delete_set) + { + delete_list[i++] = del; + } + curr_pos += save_bin(writer, delete_list.get(), _delete_set->size(), 1, curr_pos); + } + } + + // Save tags. + { + if (!_enable_tags) + { + diskann::cout << "Not saving tags as they are not enabled." << std::endl; + metadata.tags_offset = static_cast(curr_pos); + } + else + { + TagT *tag_data = new TagT[_nd + _num_frozen_pts]; + for (uint32_t i = 0; i < _nd; i++) + { + TagT tag; + if (_location_to_tag.try_get(i, tag)) + { + tag_data[i] = tag; + } + else + { + // catering to future when tagT can be any type. + std::memset((char *)&tag_data[i], 0, sizeof(TagT)); + } + } + if (_num_frozen_pts > 0) + { + std::memset((char *)&tag_data[_start], 0, sizeof(TagT) * _num_frozen_pts); + } + + curr_pos += save_bin(writer, tag_data, _nd + _num_frozen_pts, 1, curr_pos); + delete[] tag_data; + } + } + + // Save graph. + metadata.graph_offset = static_cast(curr_pos); + curr_pos += _graph_store->store(writer, _nd + _num_frozen_pts, _num_frozen_pts, _start, curr_pos); + + // Save metadata. + writer.seekp(meta_data_start, writer.beg); + writer.write((char *)&metadata, sizeof(SaveLoadMetaDataV1)); + writer.close(); + } + else + { + diskann::cout << "Save index in a single file currently only support _save_as_one_file_version = 1. " + "Not saving the index." + << std::endl; + } + } } // If frozen points were temporarily compacted to _nd, move back to @@ -393,16 +486,16 @@ void Index::save(const char *filename, bool compact_before_save #ifdef EXEC_ENV_OLS template -size_t Index::load_tags(AlignedFileReader &reader) +size_t Index::load_tags(AlignedFileReader &reader, size_t offset) { #else template -size_t Index::load_tags(const std::string tag_filename) +size_t Index::load_tags(const std::string &filename, size_t offset) { - if (_enable_tags && !file_exists(tag_filename)) + if (_enable_tags && !file_exists(filename)) { - diskann::cerr << "Tag file " << tag_filename << " does not exist!" << std::endl; - throw diskann::ANNException("Tag file " + tag_filename + " does not exist!", -1, __FUNCSIG__, __FILE__, + diskann::cerr << "Tag file " << filename << " does not exist!" << std::endl; + throw diskann::ANNException("Tag file " + filename + " does not exist!", -1, __FUNCSIG__, __FILE__, __LINE__); } #endif @@ -415,9 +508,9 @@ size_t Index::load_tags(const std::string tag_filename) size_t file_dim, file_num_points; TagT *tag_data; #ifdef EXEC_ENV_OLS - load_bin(reader, tag_data, file_num_points, file_dim); + load_bin(reader, tag_data, file_num_points, file_dim, offset); #else - load_bin(std::string(tag_filename), tag_data, file_num_points, file_dim); + load_bin(std::string(filename), tag_data, file_num_points, file_dim, offset); #endif if (file_dim != 1) @@ -449,15 +542,15 @@ size_t Index::load_tags(const std::string tag_filename) template #ifdef EXEC_ENV_OLS -size_t Index::load_data(AlignedFileReader &reader) +size_t Index::load_data(AlignedFileReader &reader, size_t offset) { #else -size_t Index::load_data(std::string filename) +size_t Index::load_data(std::string filename, size_t offset) { #endif size_t file_dim, file_num_points; #ifdef EXEC_ENV_OLS - diskann::get_bin_metadata(reader, file_num_points, file_dim); + diskann::get_bin_metadata(reader, file_num_points, file_dim, offset); #else if (!file_exists(filename)) { @@ -466,7 +559,7 @@ size_t Index::load_data(std::string filename) diskann::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } - diskann::get_bin_metadata(filename, file_num_points, file_dim); + diskann::get_bin_metadata(filename, file_num_points, file_dim, offset); #endif // since we are loading a new dataset, _empty_slots must be cleared @@ -490,29 +583,29 @@ size_t Index::load_data(std::string filename) #ifdef EXEC_ENV_OLS // REFACTOR TODO: Must figure out how to support aligned reader in a clean // manner. - copy_aligned_data_from_file(reader, _data, file_num_points, file_dim, _data_store->get_aligned_dim()); + copy_aligned_data_from_file(reader, _data, file_num_points, file_dim, _data_store->get_aligned_dim(), offset); #else - _data_store->load(filename); // offset == 0. + _data_store->load(filename, offset); // offset == 0. #endif return file_num_points; } #ifdef EXEC_ENV_OLS template -size_t Index::load_delete_set(AlignedFileReader &reader) +size_t Index::load_delete_set(AlignedFileReader &reader, size_t offset) { #else template -size_t Index::load_delete_set(const std::string &filename) +size_t Index::load_delete_set(const std::string &filename, size_t offset) { #endif std::unique_ptr delete_list; size_t npts, ndim; #ifdef EXEC_ENV_OLS - diskann::load_bin(reader, delete_list, npts, ndim); + diskann::load_bin(reader, delete_list, npts, ndim, offset); #else - diskann::load_bin(filename, delete_list, npts, ndim); + diskann::load_bin(filename, delete_list, npts, ndim, offset); #endif assert(ndim == 1); for (uint32_t i = 0; i < npts; i++) @@ -528,6 +621,7 @@ template #ifdef EXEC_ENV_OLS void Index::load(AlignedFileReader &reader, uint32_t num_threads, uint32_t search_l) { + IOContext &ctx = reader.get_ctx(); #else void Index::load(const char *filename, uint32_t num_threads, uint32_t search_l) { @@ -569,9 +663,95 @@ void Index::load(const char *filename, uint32_t num_threads, ui } else { - diskann::cout << "Single index file saving/loading support not yet " - "enabled. Not loading the index." - << std::endl; + if (_filtered_index) + { + diskann::cout << "Single index file saving/loading support for filtered index is not yet " + "enabled. Not loading the index." + << std::endl; + } + else + { + uint64_t version; + +#ifdef EXEC_ENV_OLS + std::vector readReqs; + AlignedRead readReq; + uint64_t buf[1]; + + readReq.buf = buf; + readReq.offset = 0; + readReq.len = sizeof(uint64_t); + readReqs.push_back(readReq); + reader.read(readReqs, ctx); // synchronous + if ((*(ctx.m_pRequestsStatus))[0] == IOContext::READ_SUCCESS) + { + version = buf[0]; + } +#else + std::ifstream reader(filename, std::ios::binary); + reader.read((char *)&version, sizeof(uint64_t)); +#endif + + if (version == _save_as_one_file_version) + { + SaveLoadMetaDataV1 metadata; + +#ifdef EXEC_ENV_OLS + std::vector metadata_readReqs; + AlignedRead metadata_readReq; + uint64_t metadata_buf[1]; + + metadata_readReq.buf = metadata_buf; + metadata_readReq.offset = sizeof(uint64_t); + metadata_readReq.len = sizeof(SaveLoadMetaDataV1); + metadata_readReq.push_back(readReq); + reader.read(metadata_readReqs, ctx); // synchronous + if ((*(ctx.m_pRequestsStatus))[0] == IOContext::READ_SUCCESS) + { + memcpy((void *)&metadata, (void *)buf, sizeof(SaveLoadMetaDataV1)); + } +#else + reader.read((char *)&metadata, sizeof(SaveLoadMetaDataV1)); +#endif + // Load data +#ifdef EXEC_ENV_OLS + load_data(reader, metadata.data_offset) +#else + load_data(filename, metadata.data_offset); +#endif + + // Load delete list when presents. + if (metadata.data_offset != metadata.delete_list_offset) + { +#ifdef EXEC_ENV_OLS + load_delete_set(reader, metadata.delete_list_offset); +#else + load_delete_set(filename, metadata.delete_list_offset); +#endif + } + // Load tags when presents. + if (metadata.delete_list_offset != metadata.tags_offset) + { +#ifdef EXEC_ENV_OLS + load_tags(reader, metadata.tags_offset); +#else + load_tags(filename, metadata.tags_offset); +#endif + } + // Load graph +#ifdef EXEC_ENV_OLS + load_graph(reader, metadata.graph_offset); +#else + load_graph(filename, metadata.graph_offset); +#endif + } + else + { + diskann::cout << "load index from a single file currently only support _save_as_one_file_version = 1. " + "Not loading the index." + << std::endl; + } + } return; } @@ -679,15 +859,15 @@ size_t Index::get_graph_num_frozen_points(const std::string &gr #ifdef EXEC_ENV_OLS template -size_t Index::load_graph(AlignedFileReader &reader, size_t expected_num_points) +size_t Index::load_graph(AlignedFileReader &reader, size_t expected_num_points, size_t offset) { #else template -size_t Index::load_graph(std::string filename, size_t expected_num_points) +size_t Index::load_graph(std::string filename, size_t expected_num_points, size_t offset) { #endif - auto res = _graph_store->load(filename, expected_num_points); + auto res = _graph_store->load(filename, expected_num_points, offset); _start = std::get<1>(res); _num_frozen_pts = std::get<2>(res); return std::get<0>(res); From d63ddfeb7d0fd91d1caebf299a4d8e54dfe7210a Mon Sep 17 00:00:00 2001 From: "REDMOND\\ninchen" Date: Fri, 10 Nov 2023 16:27:23 -0800 Subject: [PATCH 2/6] Clang-fomatting files. --- include/abstract_graph_store.h | 8 ++++---- include/in_mem_graph_store.h | 10 ++++++---- include/index.h | 2 +- include/index_config.h | 9 +++++---- include/utils.h | 3 +-- src/in_mem_graph_store.cpp | 11 ++++++----- src/index.cpp | 12 ++++++------ 7 files changed, 29 insertions(+), 26 deletions(-) diff --git a/include/abstract_graph_store.h b/include/abstract_graph_store.h index 110fbc11d..750fec727 100644 --- a/include/abstract_graph_store.h +++ b/include/abstract_graph_store.h @@ -21,13 +21,13 @@ class AbstractGraphStore virtual ~AbstractGraphStore() = default; // returns tuple of - virtual std::tuple load(const std::string &index_path_prefix, - const size_t num_points, size_t offset) = 0; + virtual std::tuple load(const std::string &index_path_prefix, const size_t num_points, + size_t offset) = 0; virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_fz_points, const uint32_t start) = 0; - virtual int store(std::ofstream& writer, const size_t num_points, const size_t num_fz_points, - const uint32_t start, size_t offset) = 0; + virtual int store(std::ofstream &writer, const size_t num_points, const size_t num_fz_points, const uint32_t start, + size_t offset) = 0; // not synchronised, user should use lock when necvessary. virtual const std::vector &get_neighbours(const location_t i) const = 0; diff --git a/include/in_mem_graph_store.h b/include/in_mem_graph_store.h index 0c24e1703..95e4dbcce 100644 --- a/include/in_mem_graph_store.h +++ b/include/in_mem_graph_store.h @@ -14,8 +14,8 @@ class InMemGraphStore : public AbstractGraphStore InMemGraphStore(const size_t total_pts, const size_t reserve_graph_degree); // returns tuple of - virtual std::tuple load(const std::string &index_path_prefix, - const size_t num_points, size_t offset) override; + virtual std::tuple load(const std::string &index_path_prefix, const size_t num_points, + size_t offset) override; virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_frozen_points, const uint32_t start) override; virtual int store(std::ofstream &writer, const size_t num_points, const size_t num_fz_points, const uint32_t start, @@ -34,9 +34,11 @@ class InMemGraphStore : public AbstractGraphStore virtual uint32_t get_max_observed_degree() override; protected: - virtual std::tuple load_impl(const std::string &filename, size_t expected_num_points, size_t offset); + virtual std::tuple load_impl(const std::string &filename, size_t expected_num_points, + size_t offset); #ifdef EXEC_ENV_OLS - virtual std::tuple load_impl(AlignedFileReader &reader, size_t expected_num_points, size_t offset); + virtual std::tuple load_impl(AlignedFileReader &reader, size_t expected_num_points, + size_t offset); #endif int save_graph(std::ofstream &writer, const size_t active_points, const size_t num_frozen_points, diff --git a/include/index.h b/include/index.h index e5af15599..60bf7aaf6 100644 --- a/include/index.h +++ b/include/index.h @@ -369,7 +369,7 @@ template clas bool _has_built = false; bool _saturate_graph = false; - bool _save_as_one_file; // plan to support filtered index in next version. + bool _save_as_one_file; // plan to support filtered index in next version. uint64_t _save_as_one_file_version; // Version used for save index as single file. bool _dynamic_index = false; bool _enable_tags = false; diff --git a/include/index_config.h b/include/index_config.h index 8b8f7e1f8..b527a917c 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -46,8 +46,9 @@ struct IndexConfig private: IndexConfig(DataStoreStrategy data_strategy, GraphStoreStrategy graph_strategy, Metric metric, size_t dimension, size_t max_points, size_t num_pq_chunks, size_t num_frozen_points, bool dynamic_index, bool enable_tags, - bool pq_dist_build, bool concurrent_consolidate, bool use_opq, bool filtered_index, bool save_as_one_file, - uint64_t save_as_one_file_version, std::string &data_type, const std::string &tag_type, const std::string &label_type, + bool pq_dist_build, bool concurrent_consolidate, bool use_opq, bool filtered_index, + bool save_as_one_file, uint64_t save_as_one_file_version, std::string &data_type, + const std::string &tag_type, const std::string &label_type, std::shared_ptr index_write_params, std::shared_ptr index_search_params) : data_strategy(data_strategy), graph_strategy(graph_strategy), metric(metric), dimension(dimension), @@ -234,8 +235,8 @@ class IndexConfigBuilder return IndexConfig(_data_strategy, _graph_strategy, _metric, _dimension, _max_points, _num_pq_chunks, _num_frozen_pts, _dynamic_index, _enable_tags, _pq_dist_build, _concurrent_consolidate, - _use_opq, _filtered_index, _save_as_one_file, _save_as_one_file_version, _data_type, _tag_type, - _label_type, _index_write_params, _index_search_params); + _use_opq, _filtered_index, _save_as_one_file, _save_as_one_file_version, _data_type, + _tag_type, _label_type, _index_write_params, _index_search_params); } IndexConfigBuilder(const IndexConfigBuilder &) = delete; diff --git a/include/utils.h b/include/utils.h index 4fd0dd3c1..a70ddd43d 100644 --- a/include/utils.h +++ b/include/utils.h @@ -726,8 +726,7 @@ inline size_t save_bin(const std::string &filename, T *data, size_t npts, size_t return bytes_written; } -template -inline size_t save_bin(std::ofstream &writer, T *data, size_t npts, size_t ndims, size_t offset) +template inline size_t save_bin(std::ofstream &writer, T *data, size_t npts, size_t ndims, size_t offset) { writer.seekp(offset, writer.beg); int npts_i32 = (int)npts, ndims_i32 = (int)ndims; diff --git a/src/in_mem_graph_store.cpp b/src/in_mem_graph_store.cpp index 2d0150aed..fe14c8a0d 100644 --- a/src/in_mem_graph_store.cpp +++ b/src/in_mem_graph_store.cpp @@ -32,8 +32,8 @@ int InMemGraphStore::store(const std::string &index_path_prefix, const size_t nu return file_size; } -int InMemGraphStore::store(std::ofstream &writer, const size_t num_points, - const size_t num_frozen_points, const uint32_t start, size_t offset) +int InMemGraphStore::store(std::ofstream &writer, const size_t num_points, const size_t num_frozen_points, + const uint32_t start, size_t offset) { return save_graph(writer, num_points, num_frozen_points, start, offset); } @@ -83,7 +83,8 @@ void InMemGraphStore::clear_graph() } #ifdef EXEC_ENV_OLS -std::tuple InMemGraphStore::load_impl(AlignedFileReader &reader, size_t expected_num_points, size_t offset) +std::tuple InMemGraphStore::load_impl(AlignedFileReader &reader, size_t expected_num_points, + size_t offset) { size_t expected_file_size; size_t file_frozen_pts; @@ -208,8 +209,8 @@ std::tuple InMemGraphStore::load_impl(const std::str return std::make_tuple(nodes_read, start, file_frozen_pts); } -int InMemGraphStore::save_graph(std::ofstream &writer, const size_t num_points, - const size_t num_frozen_points, const uint32_t start, size_t offset) +int InMemGraphStore::save_graph(std::ofstream &writer, const size_t num_points, const size_t num_frozen_points, + const uint32_t start, size_t offset) { writer.seekp(offset, writer.beg); size_t index_size = 24; diff --git a/src/index.cpp b/src/index.cpp index a8cd16211..b466199db 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -34,8 +34,9 @@ Index::Index(const IndexConfig &index_config, std::unique_ptr), _conc_consolidate(index_config.concurrent_consolidate) + _filtered_index(index_config.filtered_index), _save_as_one_file(index_config.save_as_one_file), + _save_as_one_file_version(index_config.save_as_one_file_version), _num_pq_chunks(index_config.num_pq_chunks), + _delete_set(new tsl::robin_set), _conc_consolidate(index_config.concurrent_consolidate) { if (_dynamic_index && !_enable_tags) { @@ -495,8 +496,7 @@ size_t Index::load_tags(const std::string &filename, size_t off if (_enable_tags && !file_exists(filename)) { diskann::cerr << "Tag file " << filename << " does not exist!" << std::endl; - throw diskann::ANNException("Tag file " + filename + " does not exist!", -1, __FUNCSIG__, __FILE__, - __LINE__); + throw diskann::ANNException("Tag file " + filename + " does not exist!", -1, __FUNCSIG__, __FILE__, __LINE__); } #endif if (!_enable_tags) @@ -720,8 +720,8 @@ void Index::load(const char *filename, uint32_t num_threads, ui load_data(filename, metadata.data_offset); #endif - // Load delete list when presents. - if (metadata.data_offset != metadata.delete_list_offset) + // Load delete list when presents. + if (metadata.data_offset != metadata.delete_list_offset) { #ifdef EXEC_ENV_OLS load_delete_set(reader, metadata.delete_list_offset); From 09c198abe1aeea47bb9cbea87ea0d26cf6cfa912 Mon Sep 17 00:00:00 2001 From: "REDMOND\\ninchen" Date: Mon, 13 Nov 2023 14:48:14 -0800 Subject: [PATCH 3/6] Compiling fix for Ubuntu. --- include/utils.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/include/utils.h b/include/utils.h index a70ddd43d..2c6c6681b 100644 --- a/include/utils.h +++ b/include/utils.h @@ -944,17 +944,6 @@ template void save_Tvecs(const char *filename, T *data, size_t npts writer.write((char *)cur_pt, ndims * sizeof(T)); } } -template -inline size_t save_data_in_base_dimensions(const std::string &filename, T *data, size_t npts, size_t ndims, - size_t aligned_dim, size_t offset = 0) -{ - std::ofstream writer; //(filename, std::ios::binary | std::ios::out); - open_file_to_write(writer, filename); - size_t file_size = save_data_in_base_dimensions(writer, data, npts, ndims, aligned_dim, offset); - writer.close(); - - return file_size; -} template inline size_t save_data_in_base_dimensions(std::ofstream &writer, T *data, size_t npts, size_t ndims, @@ -972,6 +961,18 @@ inline size_t save_data_in_base_dimensions(std::ofstream &writer, T *data, size_ return bytes_written; } +template +inline size_t save_data_in_base_dimensions(const std::string &filename, T *data, size_t npts, size_t ndims, + size_t aligned_dim, size_t offset = 0) +{ + std::ofstream writer; //(filename, std::ios::binary | std::ios::out); + open_file_to_write(writer, filename); + size_t file_size = save_data_in_base_dimensions(writer, data, npts, ndims, aligned_dim, offset); + writer.close(); + + return file_size; +} + template inline void copy_aligned_data_from_file(const char *bin_file, T *&data, size_t &npts, size_t &dim, const size_t &rounded_dim, size_t offset = 0) From e5f4a68505b83468bdbbf21851eb6a93fbca25a8 Mon Sep 17 00:00:00 2001 From: "REDMOND\\ninchen" Date: Mon, 13 Nov 2023 22:01:09 -0800 Subject: [PATCH 4/6] Fix Ubuntu Compiling. --- include/utils.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/utils.h b/include/utils.h index 2c6c6681b..9011634f5 100644 --- a/include/utils.h +++ b/include/utils.h @@ -714,18 +714,6 @@ inline void open_file_to_write(std::ofstream &writer, const std::string &filenam } } -template -inline size_t save_bin(const std::string &filename, T *data, size_t npts, size_t ndims, size_t offset = 0) -{ - std::ofstream writer; - open_file_to_write(writer, filename); - diskann::cout << "Writing bin file: " << filename.c_str() << std::endl; - size_t bytes_written = save_bin(writer, data, npts, ndims, offset); - writer.close(); - diskann::cout << "Close file " << filename << "." << std::endl; - return bytes_written; -} - template inline size_t save_bin(std::ofstream &writer, T *data, size_t npts, size_t ndims, size_t offset) { writer.seekp(offset, writer.beg); @@ -741,6 +729,18 @@ template inline size_t save_bin(std::ofstream &writer, T *data, siz return bytes_written; } +template +inline size_t save_bin(const std::string &filename, T *data, size_t npts, size_t ndims, size_t offset = 0) +{ + std::ofstream writer; + open_file_to_write(writer, filename); + diskann::cout << "Writing bin file: " << filename.c_str() << std::endl; + size_t bytes_written = save_bin(writer, data, npts, ndims, offset); + writer.close(); + diskann::cout << "Close file " << filename << "." << std::endl; + return bytes_written; +} + inline void print_progress(double percentage) { int val = (int)(percentage * 100); From 8e809e2cbbdbfe5de45f4805c4f99cc440c99a9f Mon Sep 17 00:00:00 2001 From: "REDMOND\\ninchen" Date: Tue, 14 Nov 2023 16:47:58 -0800 Subject: [PATCH 5/6] Adding Load as one file flag and version. --- include/index.h | 7 +++++-- include/index_config.h | 28 +++++++++++++++++++++++----- src/index.cpp | 13 +++++++++---- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/include/index.h b/include/index.h index 60bf7aaf6..7eeff0b84 100644 --- a/include/index.h +++ b/include/index.h @@ -66,7 +66,8 @@ template clas const bool enable_tags = false, const bool concurrent_consolidate = false, const bool pq_dist_build = false, const size_t num_pq_chunks = 0, const bool use_opq = false, const bool filtered_index = false, - bool save_as_one_file = false, uint64_t save_as_one_file_version = 1); + bool save_as_one_file = false, uint64_t save_as_one_file_version = 1, + bool load_from_one_file = false, uint64_t load_from_one_file_version = 1); DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::unique_ptr> data_store, std::unique_ptr graph_store); @@ -370,7 +371,9 @@ template clas bool _has_built = false; bool _saturate_graph = false; bool _save_as_one_file; // plan to support filtered index in next version. - uint64_t _save_as_one_file_version; // Version used for save index as single file. + uint64_t _save_as_one_file_version; // Version used for save index to single file. + bool _load_from_one_file; // Whether to load index from single file. + uint64_t _load_from_one_file_version; // Version used for save index to single file. bool _dynamic_index = false; bool _enable_tags = false; bool _normalize_vecs = false; // Using normalied L2 for cosine. diff --git a/include/index_config.h b/include/index_config.h index b527a917c..6ada17d07 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -30,6 +30,8 @@ struct IndexConfig bool filtered_index; bool save_as_one_file; uint64_t save_as_one_file_version; + bool load_from_one_file; + uint64_t load_from_one_file_version; size_t num_pq_chunks; size_t num_frozen_pts; @@ -47,14 +49,15 @@ struct IndexConfig IndexConfig(DataStoreStrategy data_strategy, GraphStoreStrategy graph_strategy, Metric metric, size_t dimension, size_t max_points, size_t num_pq_chunks, size_t num_frozen_points, bool dynamic_index, bool enable_tags, bool pq_dist_build, bool concurrent_consolidate, bool use_opq, bool filtered_index, - bool save_as_one_file, uint64_t save_as_one_file_version, std::string &data_type, - const std::string &tag_type, const std::string &label_type, - std::shared_ptr index_write_params, + bool save_as_one_file, uint64_t save_as_one_file_version, bool load_from_one_file, + uint64_t load_from_one_file_version, std::string &data_type, const std::string &tag_type, + const std::string &label_type, std::shared_ptr index_write_params, std::shared_ptr index_search_params) : data_strategy(data_strategy), graph_strategy(graph_strategy), metric(metric), dimension(dimension), max_points(max_points), dynamic_index(dynamic_index), enable_tags(enable_tags), pq_dist_build(pq_dist_build), concurrent_consolidate(concurrent_consolidate), use_opq(use_opq), filtered_index(filtered_index), save_as_one_file(save_as_one_file), save_as_one_file_version(save_as_one_file_version), + load_from_one_file(load_from_one_file), load_from_one_file_version(load_from_one_file_version), num_pq_chunks(num_pq_chunks), num_frozen_pts(num_frozen_points), label_type(label_type), tag_type(tag_type), data_type(data_type), index_write_params(index_write_params), index_search_params(index_search_params) { @@ -210,6 +213,18 @@ class IndexConfigBuilder return *this; } + IndexConfigBuilder &with_load_from_single_file(bool load_from_one_file) + { + this->_load_from_one_file = load_from_one_file; + return *this; + } + + IndexConfigBuilder &with_load_from_single_file_version(uint64_t load_from_one_file_version) + { + this->_save_as_one_file_version = load_from_one_file_version; + return *this; + } + IndexConfig build() { if (_data_type == "" || _data_type.empty()) @@ -235,8 +250,9 @@ class IndexConfigBuilder return IndexConfig(_data_strategy, _graph_strategy, _metric, _dimension, _max_points, _num_pq_chunks, _num_frozen_pts, _dynamic_index, _enable_tags, _pq_dist_build, _concurrent_consolidate, - _use_opq, _filtered_index, _save_as_one_file, _save_as_one_file_version, _data_type, - _tag_type, _label_type, _index_write_params, _index_search_params); + _use_opq, _filtered_index, _save_as_one_file, _save_as_one_file_version, _load_from_one_file, + _load_from_one_file_version, _data_type, _tag_type, _label_type, _index_write_params, + _index_search_params); } IndexConfigBuilder(const IndexConfigBuilder &) = delete; @@ -258,6 +274,8 @@ class IndexConfigBuilder bool _filtered_index{defaults::HAS_LABELS}; bool _save_as_one_file; uint64_t _save_as_one_file_version; + bool _load_from_one_file; + uint64_t _load_from_one_file_version; size_t _num_pq_chunks = 0; size_t _num_frozen_pts{defaults::NUM_FROZEN_POINTS_STATIC}; diff --git a/src/index.cpp b/src/index.cpp index b466199db..10db4dab6 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -35,7 +35,9 @@ Index::Index(const IndexConfig &index_config, std::unique_ptr), _conc_consolidate(index_config.concurrent_consolidate) { if (_dynamic_index && !_enable_tags) @@ -126,7 +128,8 @@ Index::Index(Metric m, const size_t dim, const size_t max_point const std::shared_ptr index_search_params, const size_t num_frozen_pts, const bool dynamic_index, const bool enable_tags, const bool concurrent_consolidate, const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq, - const bool filtered_index, bool save_as_one_file, uint64_t save_as_one_file_version) + const bool filtered_index, bool save_as_one_file, uint64_t save_as_one_file_version, + bool load_from_one_file, uint64_t load_from_one_file_version) : Index(IndexConfigBuilder() .with_metric(m) .with_dimension(dim) @@ -144,6 +147,8 @@ Index::Index(Metric m, const size_t dim, const size_t max_point .with_data_type(diskann_type_to_name()) .with_save_as_single_file(save_as_one_file) .with_save_as_single_file_version(save_as_one_file_version) + .with_load_from_single_file(load_from_one_file) + .with_load_from_single_file_version(load_from_one_file_version) .build(), IndexFactory::construct_datastore( DataStoreStrategy::MEMORY, @@ -640,7 +645,7 @@ void Index::load(const char *filename, uint32_t num_threads, ui std::string labels_to_medoids = mem_index_file + "_labels_to_medoids.txt"; std::string labels_map_file = mem_index_file + "_labels_map.txt"; #endif - if (!_save_as_one_file) + if (!_load_from_one_file) { // For DLVS Store, we will not support saving the index in multiple // files. @@ -692,7 +697,7 @@ void Index::load(const char *filename, uint32_t num_threads, ui reader.read((char *)&version, sizeof(uint64_t)); #endif - if (version == _save_as_one_file_version) + if (version == _load_from_one_file_version) { SaveLoadMetaDataV1 metadata; From 2315f6779cb7241abb6d28e279e882a28f0fadda Mon Sep 17 00:00:00 2001 From: "REDMOND\\ninchen" Date: Tue, 14 Nov 2023 16:49:24 -0800 Subject: [PATCH 6/6] Clang Formatting. --- include/index.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/index.h b/include/index.h index 7eeff0b84..387a9ac07 100644 --- a/include/index.h +++ b/include/index.h @@ -370,9 +370,9 @@ template clas bool _has_built = false; bool _saturate_graph = false; - bool _save_as_one_file; // plan to support filtered index in next version. - uint64_t _save_as_one_file_version; // Version used for save index to single file. - bool _load_from_one_file; // Whether to load index from single file. + bool _save_as_one_file; // plan to support filtered index in next version. + uint64_t _save_as_one_file_version; // Version used for save index to single file. + bool _load_from_one_file; // Whether to load index from single file. uint64_t _load_from_one_file_version; // Version used for save index to single file. bool _dynamic_index = false; bool _enable_tags = false;