diff --git a/src/core/field_extractor.h b/src/core/field_extractor.h index 1de14c338..764e874e1 100644 --- a/src/core/field_extractor.h +++ b/src/core/field_extractor.h @@ -56,7 +56,7 @@ class FieldExtractor { // fulltext index bool fulltext_indexed_ = false; // vector index - std::unique_ptr vector_index_; + std::shared_ptr vector_index_; public: FieldExtractor() : null_bit_off_(0), vertex_index_(nullptr), @@ -74,16 +74,7 @@ class FieldExtractor { vertex_index_.reset(rhs.vertex_index_ ? new VertexIndex(*rhs.vertex_index_) : nullptr); edge_index_.reset(rhs.edge_index_ ? new EdgeIndex(*rhs.edge_index_) : nullptr); fulltext_indexed_ = rhs.fulltext_indexed_; - if (rhs.vector_index_ != nullptr) { - if (rhs.vector_index_->GetIndexType() == "HNSW") { - vector_index_.reset(new HNSW( - dynamic_cast(*rhs.vector_index_))); - } else { - vector_index_.reset(nullptr); - } - } else { - vector_index_.reset(nullptr); - } + vector_index_ = rhs.vector_index_; } FieldExtractor& operator=(const FieldExtractor& rhs) { @@ -97,16 +88,7 @@ class FieldExtractor { vertex_index_.reset(rhs.vertex_index_ ? new VertexIndex(*rhs.vertex_index_) : nullptr); edge_index_.reset(rhs.edge_index_ ? new EdgeIndex(*rhs.edge_index_) : nullptr); fulltext_indexed_ = rhs.fulltext_indexed_; - if (rhs.vector_index_ != nullptr) { - if (rhs.vector_index_->GetIndexType() == "HNSW") { - vector_index_.reset(new HNSW( - dynamic_cast(*rhs.vector_index_))); - } else { - vector_index_.reset(nullptr); - } - } else { - vector_index_.reset(nullptr); - } + vector_index_ = rhs.vector_index_; return *this; } diff --git a/src/core/index_manager.cpp b/src/core/index_manager.cpp index 97cdc754c..99b0cd0c0 100644 --- a/src/core/index_manager.cpp +++ b/src/core/index_manager.cpp @@ -194,10 +194,8 @@ bool IndexManager::AddVectorIndex(KvTransaction& txn, const std::string& label, Value idxv; StoreVectorIndex(idx, idxv); it->AddKeyValue(Value::ConstRef(table_name), idxv); - if (index_type == "hnsw") { - vector_index = std::make_unique(label, field, distance_type, + vector_index = std::make_unique(label, field, distance_type, index_type, vec_dimension, index_spec); - } return true; } diff --git a/src/core/lightning_graph.cpp b/src/core/lightning_graph.cpp index 27ce0bcd3..98c8090a4 100644 --- a/src/core/lightning_graph.cpp +++ b/src/core/lightning_graph.cpp @@ -832,6 +832,8 @@ bool LightningGraph::AlterLabelDelFields(const std::string& label, // delete fulltext index index_manager_->DeleteFullTextIndex(txn.GetTxn(), is_vertex, label, extractor->Name()); + } else if (extractor->GetVectorIndex()) { + index_manager_->DeleteVectorIndex(txn.GetTxn(), label, extractor->Name()); } } auto composite_index_key = curr_schema->GetRelationalCompositeIndexKey(fids); diff --git a/src/core/schema.cpp b/src/core/schema.cpp index 8c6fbceaf..2fbbe219c 100644 --- a/src/core/schema.cpp +++ b/src/core/schema.cpp @@ -314,10 +314,9 @@ void Schema::AddEdgeToIndex(KvTransaction& txn, const EdgeUid& euid, const Value } void Schema::AddVectorToVectorIndex(KvTransaction& txn, VertexId vid, const Value& record) { - LOG_INFO() << "Schema::AddVectorToVectorIndex " << vector_index_fields_.size(); for (auto& idx : vector_index_fields_) { - LOG_INFO() << "Schema::AddVectorToVectorIndex-1"; auto& fe = fields_[idx]; + std::string a = fe.Name(); if (fe.GetIsNull(record)) continue; VectorIndex* index = fe.GetVectorIndex(); auto dim = index->GetVecDimension(); diff --git a/src/core/vsag_hnsw.cpp b/src/core/vsag_hnsw.cpp index 0422fae75..e20449f09 100644 --- a/src/core/vsag_hnsw.cpp +++ b/src/core/vsag_hnsw.cpp @@ -26,11 +26,6 @@ HNSW::HNSW(const std::string& label, const std::string& name, vec_dimension, std::move(index_spec)), createindex_(nullptr), index_(createindex_.get()) {} -HNSW::HNSW(const HNSW& rhs) - : VectorIndex(rhs), - createindex_(rhs.createindex_), - index_(createindex_.get()) {} - // add vector to index void HNSW::Add(const std::vector>& vectors, const std::vector& vids, int64_t num_vectors) { @@ -190,11 +185,9 @@ HNSW::Search(const std::vector& query, int64_t num_results, int ef_search nlohmann::json parameters{ {"hnsw", {{"ef_search", ef_search}}}, }; - LOG_INFO() << "index_->GetNumElements(): " << index_->GetNumElements(); std::vector> ret; auto result = index_->KnnSearch(dataset, num_results, parameters.dump()); if (result.has_value()) { - LOG_INFO() << "result.value()->GetDim():" << result.value()->GetDim(); for (int64_t i = 0; i < result.value()->GetDim(); ++i) { ret.emplace_back(result.value()->GetIds()[i], result.value()->GetDistances()[i]); } diff --git a/src/core/vsag_hnsw.h b/src/core/vsag_hnsw.h index e14b43554..834a52d20 100644 --- a/src/core/vsag_hnsw.h +++ b/src/core/vsag_hnsw.h @@ -39,7 +39,7 @@ class HNSW : public VectorIndex { const std::string& distance_type, const std::string& index_type, int vec_dimension, std::vector index_spec); - HNSW(const HNSW& rhs); + HNSW(const HNSW& rhs) = delete; HNSW(HNSW&& rhs) = delete; diff --git a/test/resource/unit_test/vector_index/cypher/vector_index.result b/test/resource/unit_test/vector_index/cypher/vector_index.result new file mode 100644 index 000000000..438ddb422 --- /dev/null +++ b/test/resource/unit_test/vector_index/cypher/vector_index.result @@ -0,0 +1,40 @@ +CALL db.createVertexLabelByJson('{"label":"person","primary":"id","type":"VERTEX","detach_property":true,"properties":[{"name":"id","type":"INT32","optional":false},{"name":"name","type":"STRING","optional":false,"index":false},{"name":"embedding1","type":"FLOAT_VECTOR","optional":false}, {"name":"embedding2","type":"FLOAT_VECTOR","optional":false}]}'); +[] +CALL db.addVertexVectorIndex('person','embedding1', {dimension:4}); +[] +CALL db.addVertexVectorIndex('person','embedding2', {dimension:4}); +[] +CALL db.addVertexVectorIndex('person','name', {dimension:4}); +[VectorIndexException] Only FLOAT_VECTOR type supports vector index +CALL db.showVertexVectorIndex(); +[{"dimension":4,"distance_type":"l2","field_name":"embedding1","hnsm.ef_construction":100,"hnsm.m":16,"index_type":"hnsw","label_name":"person"},{"dimension":4,"distance_type":"l2","field_name":"embedding2","hnsm.ef_construction":100,"hnsm.m":16,"index_type":"hnsw","label_name":"person"}] +CREATE (n:person {id:1, name:'name1', embedding1: [1.0,1.0,1.0,1.0], embedding2: [11.0,11.0,11.0,11.0]}); +[{"":"created 1 vertices, created 0 edges."}] +CREATE (n:person {id:2, name:'name2', embedding1: [2.0,2.0,2.0,2.0], embedding2: [12.0,12.0,12.0,12.0]}); +[{"":"created 1 vertices, created 0 edges."}] +CALL db.upsertVertex('person', [{id:3, name:'name3', embedding1: [3.0,3.0,3.0,3.0], embedding2: [13.0,13.0,13.0,13.0]}, {id:4, name:'name4', embedding1: [4.0,4.0,4.0,4.0], embedding2: [14.0,14.0,14.0,14.0]}]); +[{"data_error":0,"index_conflict":0,"insert":2,"total":2,"update":0}] +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}); +[{"node":{"identity":1,"label":"person","properties":{"embedding1":[2.0,2.0,2.0,2.0],"embedding2":[12.0,12.0,12.0,12.0],"id":2,"name":"name2"}},"score":6.0},{"node":{"identity":2,"label":"person","properties":{"embedding1":[3.0,3.0,3.0,3.0],"embedding2":[13.0,13.0,13.0,13.0],"id":3,"name":"name3"}},"score":6.0}] +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +[{"node.id":2},{"node.id":3}] +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:20, hnsw_ef_search:100}) yield node return node.id; +[{"node.id":2},{"node.id":3},{"node.id":1},{"node.id":4}] +CALL db.vertexVectorIndexQuery('person','embedding2',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +[{"node.id":1},{"node.id":2}] +CALL db.upsertVertex('person', [{id:1, embedding1: [33.0,33.0,33.0,33.0]}]); +[{"data_error":0,"index_conflict":0,"insert":0,"total":1,"update":1}] +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +[{"node.id":2},{"node.id":3}] +match(n:person {id:2}) delete n; +[{"":"deleted 1 vertices, deleted 0 edges."}] +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +[{"node.id":3},{"node.id":4}] +CALL db.alterLabelDelFields('vertex', 'person', ['embedding1']); +[{"record_affected":3}] +CALL db.showVertexVectorIndex(); +[{"dimension":4,"distance_type":"l2","field_name":"embedding2","hnsm.ef_construction":100,"hnsm.m":16,"index_type":"hnsw","label_name":"person"}] +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +[FieldNotFound] Field [embedding1] does not exist. +CALL db.vertexVectorIndexQuery('person','embedding2',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +[{"node.id":1},{"node.id":3}] diff --git a/test/resource/unit_test/vector_index/cypher/vector_index.test b/test/resource/unit_test/vector_index/cypher/vector_index.test new file mode 100644 index 000000000..22c4eb12a --- /dev/null +++ b/test/resource/unit_test/vector_index/cypher/vector_index.test @@ -0,0 +1,20 @@ +CALL db.createVertexLabelByJson('{"label":"person","primary":"id","type":"VERTEX","detach_property":true,"properties":[{"name":"id","type":"INT32","optional":false},{"name":"name","type":"STRING","optional":false,"index":false},{"name":"embedding1","type":"FLOAT_VECTOR","optional":false}, {"name":"embedding2","type":"FLOAT_VECTOR","optional":false}]}'); +CALL db.addVertexVectorIndex('person','embedding1', {dimension:4}); +CALL db.addVertexVectorIndex('person','embedding2', {dimension:4}); +CALL db.addVertexVectorIndex('person','name', {dimension:4}); +CALL db.showVertexVectorIndex(); +CREATE (n:person {id:1, name:'name1', embedding1: [1.0,1.0,1.0,1.0], embedding2: [11.0,11.0,11.0,11.0]}); +CREATE (n:person {id:2, name:'name2', embedding1: [2.0,2.0,2.0,2.0], embedding2: [12.0,12.0,12.0,12.0]}); +CALL db.upsertVertex('person', [{id:3, name:'name3', embedding1: [3.0,3.0,3.0,3.0], embedding2: [13.0,13.0,13.0,13.0]}, {id:4, name:'name4', embedding1: [4.0,4.0,4.0,4.0], embedding2: [14.0,14.0,14.0,14.0]}]); +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}); +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:20, hnsw_ef_search:100}) yield node return node.id; +CALL db.vertexVectorIndexQuery('person','embedding2',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +CALL db.upsertVertex('person', [{id:1, embedding1: [33.0,33.0,33.0,33.0]}]); +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +match(n:person {id:2}) delete n; +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +CALL db.alterLabelDelFields('vertex', 'person', ['embedding1']); +CALL db.showVertexVectorIndex(); +CALL db.vertexVectorIndexQuery('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; +CALL db.vertexVectorIndexQuery('person','embedding2',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id; \ No newline at end of file diff --git a/test/test_cypher_v2.cpp b/test/test_cypher_v2.cpp index d136585c5..d72a55939 100644 --- a/test/test_cypher_v2.cpp +++ b/test/test_cypher_v2.cpp @@ -596,3 +596,10 @@ TEST_F(TestCypherV2, TestEdgeIdQuery) { std::string dir = test_suite_dir_ + "/edge_id_query/cypher"; test_files(dir); } + +TEST_F(TestCypherV2, TestVectorIndex) { + set_graph_type(GraphFactory::GRAPH_DATASET_TYPE::EMPTY); + set_query_type(lgraph::ut::QUERY_TYPE::NEWCYPHER); + std::string dir = test_suite_dir_ + "/vector_index/cypher"; + test_files(dir); +} \ No newline at end of file diff --git a/test/test_vsag_index.cpp b/test/test_vsag_index.cpp index 1ae7b4dfa..5aa37840d 100644 --- a/test/test_vsag_index.cpp +++ b/test/test_vsag_index.cpp @@ -52,7 +52,7 @@ class TestVsag : public TuGraphTest { } } vector_index = - std::make_unique("label", "name", "L2", "HNSW", dim, index_spec); + std::make_unique("label", "name", "l2", "hnsw", dim, index_spec); } void TearDown() override {} }; @@ -79,7 +79,7 @@ TEST_F(TestVsag, SaveAndLoadIndex) { EXPECT_NO_THROW(vector_index->Add(vectors, vids, num_vectors)); std::vector serialized_index = vector_index->Save(); ASSERT_FALSE(serialized_index.empty()); - lgraph::HNSW vector_index_loaded("label", "name", "L2", "HNSW", dim, index_spec); + lgraph::HNSW vector_index_loaded("label", "name", "l2", "hnsw", dim, index_spec); ASSERT_TRUE(vector_index_loaded.Build()); vector_index_loaded.Load(serialized_index); std::vector query(vectors[0].begin(), vectors[0].end()); @@ -148,7 +148,7 @@ TEST_F(TestVsag, restart) { "'person','id','id','int64',false,'vector','float_vector',true)"); UT_EXPECT_TRUE(ret); ret = client.CallCypher( - str, "CALL db.AddVertexVectorIndex('person','vector', {dimension:4})"); + str, "CALL db.addVertexVectorIndex('person','vector', {dimension:4})"); UT_EXPECT_TRUE(ret); ret = client.CallCypher(str, "CREATE (n:person {id:1, vector: [1.0,1.0,1.0,1.0]})"); UT_EXPECT_TRUE(ret); @@ -157,8 +157,9 @@ TEST_F(TestVsag, restart) { ret = client.CallCypher(str, "CALL db.upsertVertex('person', [{id:3, vector: [3.0,3.0,3.0,3.0]}," "{id:4, vector: [4.0,4.0,4.0,4.0]}])"); + UT_EXPECT_TRUE(ret); ret = client.CallCypher(str,"CALL db.vertexVectorIndexQuery" //NOLINT - "('person','vector',[1,2,3,4], 4, 10) YIELD node RETURN node.id"); + "('person','vector',[1,2,3,4], {top_k:4, hnsw_ef_search:10}) YIELD node RETURN node.id"); UT_EXPECT_EQ(str, R"([{"node.id":2},{"node.id":3},{"node.id":1},{"node.id":4}])"); UT_EXPECT_TRUE(ret); server->Kill(); @@ -171,10 +172,25 @@ TEST_F(TestVsag, restart) { _detail::DEFAULT_ADMIN_NAME, _detail::DEFAULT_ADMIN_PASS); std::string str; auto ret = client.CallCypher(str, "CALL db.vertexVectorIndexQuery" - "('person','vector',[1,2,3,4], 4, 10) " + "('person','vector',[1,2,3,4], {top_k:4, hnsw_ef_search:10}) " "YIELD node RETURN node.id"); UT_EXPECT_EQ(str, R"([{"node.id":2},{"node.id":3},{"node.id":1},{"node.id":4}])"); UT_EXPECT_TRUE(ret); + ret = client.CallCypher(str, "CALL db.alterLabelDelFields('vertex', 'person', ['vector'])"); + UT_EXPECT_TRUE(ret); + server->Kill(); + server->Wait(); + } + { + auto server = StartLGraphServer(conf); + // create graphs + RpcClient client(UT_FMT("{}:{}", conf.bind_host, conf.rpc_port), + _detail::DEFAULT_ADMIN_NAME, _detail::DEFAULT_ADMIN_PASS); + std::string str; + auto ret = client.CallCypher(str, "CALL db.vertexVectorIndexQuery" + "('person','vector',[1,2,3,4], 4, 10) " + "YIELD node RETURN node.id"); + UT_EXPECT_FALSE(ret); server->Kill(); server->Wait(); }