Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IVF-Flat reconstruction #1270

Open
wants to merge 48 commits into
base: branch-23.08
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
328a179
Initial index splitting
lowener Feb 7, 2023
a83aca4
Adapt `extend`
lowener Feb 7, 2023
843904a
Refactoring: build and extend fix
lowener Feb 9, 2023
f09b3a0
Refactor ivf flat search for index splitting
lowener Feb 10, 2023
fdc9395
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Feb 10, 2023
93d5b35
Use mdpsan/mdarray aliases
lowener Feb 12, 2023
5fcf564
Add serialization
lowener Feb 13, 2023
c49bf67
Deserialize ivf_flat and style fix
lowener Feb 16, 2023
6ba87d4
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Feb 19, 2023
7e2d80b
Integrate ivf::list to ivf_flat index splitting
lowener Feb 21, 2023
74e0a8c
Update refine
lowener Feb 21, 2023
7b36742
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Feb 22, 2023
135a9b6
Use std vector for ivf flat index list
lowener Feb 24, 2023
81b2cbf
Test second variant of `ivf_flat::extend`
lowener Feb 24, 2023
fd33dbc
Use ValueT template on spec
lowener Feb 25, 2023
539fbc5
Use second variant of ivf_extend
lowener Feb 26, 2023
31815d7
Merge branch 'branch-23.04' into 23.04-flat-split
cjnolet Feb 27, 2023
33bfb82
Fix spec template
lowener Feb 28, 2023
adb96e4
Revert ValueT on ivfpq
lowener Mar 6, 2023
32936c9
Make ivf::list more flexible
achirkin Mar 7, 2023
23e0f84
Merge pull request #1 from achirkin/23.04-flat-split
lowener Mar 7, 2023
acf1888
Use new list API for ivf flat
lowener Mar 7, 2023
0283d25
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Mar 7, 2023
f790abf
Fix adaptive centers
lowener Mar 9, 2023
6a05196
Fix adaptive center norms testing
lowener Mar 9, 2023
eda7923
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Mar 9, 2023
f0a7031
Fix list spec template after merge
lowener Mar 9, 2023
a6c54a4
Add serialization overload
lowener Mar 9, 2023
85b4aa1
Fix list size override
lowener Mar 10, 2023
692af0d
Fix resizelist with interleaved format
lowener Mar 10, 2023
d892ebb
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Mar 10, 2023
a8b96a7
Fix refine list resize operation
lowener Mar 12, 2023
1870541
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Mar 12, 2023
beb9264
IVF-Flat reconstruction
viclafargue Mar 14, 2023
2e8bc92
Merge branch 'branch-23.04' into ivf-reconstruction
viclafargue Mar 15, 2023
ebda975
Merge branch 'branch-23.04' into ivf-reconstruction
cjnolet Mar 23, 2023
81d51be
Merge branch 'branch-23.04' into ivf-reconstruction
cjnolet Mar 25, 2023
d250ae9
Merge branch 'branch-23.06' into ivf-reconstruction
cjnolet Apr 12, 2023
e9df346
Merge branch 'branch-23.06' into ivf-reconstruction
viclafargue May 22, 2023
f798657
addressing review + expose function
viclafargue May 23, 2023
080c784
Cluster reconstruction
viclafargue May 29, 2023
0f6cdbe
addressing review 1/2
viclafargue Jul 12, 2023
cad88b3
addressing review 2/2
viclafargue Jul 12, 2023
6cd1abc
Fix style
viclafargue Jul 12, 2023
de6c13d
addressing review
viclafargue Jul 14, 2023
7064703
Merge branch 'branch-23.08' into ivf-reconstruction
viclafargue Jul 14, 2023
5e02d7f
Fix style
viclafargue Jul 14, 2023
87edda0
Fix doc
viclafargue Jul 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,9 @@ if(RAFT_COMPILE_LIBRARY)
src/neighbors/ivf_flat_search_float_int64_t.cu
src/neighbors/ivf_flat_search_int8_t_int64_t.cu
src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
src/neighbors/ivf_flat_reconstruct_float_int64_t.cu
src/neighbors/ivf_flat_reconstruct_int8_t_int64_t.cu
src/neighbors/ivf_flat_reconstruct_uint8_t_int64_t.cu
src/neighbors/ivfpq_build_float_int64_t.cu
src/neighbors/ivfpq_build_int8_t_int64_t.cu
src/neighbors/ivfpq_build_uint8_t_int64_t.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/ann/src/faiss/faiss_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
// stop compiler warning; not all algorithms support multi-GPU so it may not be used
(void)dev_list;

raft::bench::ann::Metric metric = parse_metric(distance);
std::unique_ptr<raft::bench::ann::ANN<T>> ann;

if constexpr (std::is_same_v<T, float>) {
raft::bench::ann::Metric metric = parse_metric(distance);
if (algo == "faiss_gpu_ivf_flat") {
ann = make_algo<T, raft::bench::ann::FaissGpuIVFFlat>(metric, dim, conf, dev_list);
} else if (algo == "faiss_gpu_ivf_pq") {
Expand Down
151 changes: 151 additions & 0 deletions cpp/include/raft/neighbors/detail/ivf_flat_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
#pragma once

#include <raft/cluster/kmeans_balanced.cuh>
#include <raft/core/device_mdspan.hpp>
#include <raft/core/logger.hpp>
#include <raft/core/mdarray.hpp>
#include <raft/core/nvtx.hpp>
#include <raft/core/operators.hpp>
#include <raft/core/resource/cuda_stream.hpp>
#include <raft/core/resource/thrust_policy.hpp>
#include <raft/core/resources.hpp>
#include <raft/linalg/add.cuh>
#include <raft/linalg/map.cuh>
Expand All @@ -35,6 +37,8 @@

#include <rmm/cuda_stream_view.hpp>

#include <thrust/extrema.h>

#include <cstdint>

namespace raft::neighbors::ivf_flat::detail {
Expand Down Expand Up @@ -416,4 +420,151 @@ inline void fill_refinement_index(raft::resources const& handle,
refinement_index->veclen());
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

template <typename T, typename IdxT>
__global__ void get_data_ptr_kernel(const uint32_t* list_sizes,
const T* const* list_data_ptrs,
const IdxT* const* list_indices_ptrs,
uint32_t dim,
uint32_t veclen,
uint32_t n_list,
IdxT max_indice,
T** ptrs_to_data)
{
const IdxT list_id = IdxT(blockDim.x) * IdxT(blockIdx.x) + threadIdx.x;
if (list_id >= n_list) { return; }
const IdxT inlist_id = IdxT(blockDim.y) * IdxT(blockIdx.y) + threadIdx.y;
const uint32_t list_size = list_sizes[list_id];
if (inlist_id >= list_size) { return; }

auto* list_indices = list_indices_ptrs[list_id];
IdxT id = list_indices[inlist_id];
if (id > max_indice) { return; }

using interleaved_group = Pow2<kIndexGroupSize>;
auto group_offset = interleaved_group::roundDown(inlist_id);
auto ingroup_id = interleaved_group::mod(inlist_id) * veclen;

auto* list_data = list_data_ptrs[list_id];
const T* ptr = list_data + (group_offset * dim) + ingroup_id;
ptrs_to_data[id] = (T*)ptr;
}

template <typename T, typename IdxT>
__global__ void reconstruct_batch_kernel(const IdxT* vector_ids,
const T** ptrs_to_data,
uint32_t dim,
uint32_t veclen,
IdxT n_rows,
T* reconstr)
{
const IdxT i = IdxT(blockDim.x) * IdxT(blockIdx.x) + threadIdx.x;
if (i >= n_rows) { return; }

const IdxT vector_id = vector_ids[i];
const T* src = ptrs_to_data[vector_id];
if (!src) { return; }

reconstr += i * dim;
for (uint32_t l = 0; l < dim; l += veclen) {
for (uint32_t j = 0; j < veclen; j++) {
reconstr[l + j] = src[l * kIndexGroupSize + j];
}
}
}

template <typename T, typename IdxT>
void reconstruct_batch(raft::resources const& handle,
const index<T, IdxT>& index,
raft::device_vector_view<const IdxT, IdxT> vector_ids,
raft::device_matrix_view<T, IdxT, row_major> vector_out)
{
auto stream = raft::resource::get_cuda_stream(handle);
auto exec_policy = raft::resource::get_thrust_policy(handle);

thrust::device_ptr<const IdxT> vector_ids_ptr =
thrust::device_pointer_cast(vector_ids.data_handle());
IdxT max_indice =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the consensus was to opt for a "inverted-index" module (hashmap/array/whatever) shared among ivf methods to convert user indices to (label, in-cluster-offset) pairs?
This code could break if the user adds non-contiguous range of large indices to the DB.

Copy link
Contributor Author

@viclafargue viclafargue Jul 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the review! Yes absolutely, this is a temporary solution that would be largely improved by the use of a hashmap in a follow-up PR. But, I thought that letting this version as it is would set the API and allow people to run a reconstruction if their use case allows it (smaller index). But, can still remove it if that's the better path forward. What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was actually thinking of decoupling the ivf-list hashmap struct api from the ivf-pq and ivf-flat methods. Hence a user would need to construct the hashmap explicitly once (costly operation) and then either:
a) search-by-user-ids in two calls (e.g. hashmap::get_lists_offsets + index::get_vectors)
b) pass the hashmap as an argument to the ivf index methods (index::get_vectors)

Not sure what we will decide in the end, but point is the api may change as we progress with the hashmap. Then, maybe we just can keep this function in the detail namespace for now? Hence the work won't be lost and if someone needs the functionality urgently, they can use the detail function, and we don't need to break the public api in the follow-up PR.

*thrust::max_element(exec_policy, vector_ids_ptr, vector_ids_ptr + vector_ids.extent(0));

rmm::device_uvector<T*> ptrs_to_data(max_indice + 1, stream);
utils::memzero(ptrs_to_data.data(), ptrs_to_data.size(), stream);

thrust::device_ptr<const uint32_t> list_sizes_ptr =
thrust::device_pointer_cast(index.list_sizes().data_handle());
uint32_t max_list_size = *thrust::max_element(
exec_policy, list_sizes_ptr, list_sizes_ptr + index.list_sizes().extent(0));

const dim3 block_dim1(16, 16);
const dim3 grid_dim1(raft::ceildiv<size_t>(index.n_lists(), block_dim1.x),
raft::ceildiv<size_t>(max_list_size, block_dim1.y));
get_data_ptr_kernel<<<grid_dim1, block_dim1, 0, stream>>>(index.list_sizes().data_handle(),
index.data_ptrs().data_handle(),
index.inds_ptrs().data_handle(),
index.dim(),
index.veclen(),
index.n_lists(),
max_indice,
ptrs_to_data.data());
RAFT_CUDA_TRY(cudaPeekAtLastError());

auto n_reconstruction = vector_ids.extent(0);
const dim3 block_dim2(256);
const dim3 grid_dim2(raft::ceildiv<size_t>(n_reconstruction, block_dim2.x));
reconstruct_batch_kernel<<<grid_dim2, block_dim2, 0, stream>>>(vector_ids.data_handle(),
(const T**)ptrs_to_data.data(),
index.dim(),
index.veclen(),
n_reconstruction,
vector_out.data_handle());
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

template <typename T, typename IdxT>
__global__ void reconstruct_list_data_kernel(T* out_vectors,
T* in_list_data,
std::variant<IdxT, const IdxT*> offset_or_indices,
IdxT len,
size_t veclen,
IdxT dim)
{
for (IdxT ix = threadIdx.x + blockDim.x * blockIdx.x; ix < len; ix += blockDim.x) {
const IdxT src_ix = std::holds_alternative<IdxT>(offset_or_indices)
? std::get<IdxT>(offset_or_indices) + ix
: std::get<const IdxT*>(offset_or_indices)[ix];

using group_align = Pow2<kIndexGroupSize>;
const IdxT group_ix = group_align::div(src_ix);
const IdxT ingroup_ix = group_align::mod(src_ix) * veclen;

for (IdxT l = 0; l < dim; l += veclen) {
for (IdxT j = 0; j < veclen; j++) {
out_vectors[ix * dim + l + j] = in_list_data[l * kIndexGroupSize + ingroup_ix + j];
}
}
}
}

/** Decode the list data; see the public interface for the api and usage. */
template <typename T, typename IdxT>
void reconstruct_list_data(raft::resources const& handle,
const index<T, IdxT>& index,
device_matrix_view<T, uint32_t, row_major> out_vectors,
uint32_t label,
uint32_t offset)
{
auto stream = raft::resource::get_cuda_stream(handle);

IdxT len = out_vectors.extent(0);
const dim3 block_dim(256);
const dim3 grid_dim(raft::div_rounding_up_safe<size_t>(len, block_dim.x));
reconstruct_list_data_kernel<T, IdxT>
<<<grid_dim, block_dim, 0, stream>>>((T*)out_vectors.data_handle(),
(T*)index.lists()[label]->data.data_handle(),
(IdxT)offset,
(IdxT)len,
(size_t)index.veclen(),
(IdxT)index.dim());
}

} // namespace raft::neighbors::ivf_flat::detail
21 changes: 21 additions & 0 deletions cpp/include/raft/neighbors/ivf_flat-ext.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ void search(raft::resources const& handle,
raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
raft::device_matrix_view<float, IdxT, row_major> distances) RAFT_EXPLICIT;

template <typename T, typename IdxT>
void reconstruct_list_data(raft::resources const& handle,
const index<T, IdxT>& index,
device_matrix_view<T, uint32_t, row_major> out_vectors,
uint32_t label,
uint32_t offset) RAFT_EXPLICIT;

} // namespace raft::neighbors::ivf_flat

#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY
Expand Down Expand Up @@ -204,3 +211,17 @@ instantiate_raft_neighbors_ivf_flat_search(int8_t, int64_t);
instantiate_raft_neighbors_ivf_flat_search(uint8_t, int64_t);

#undef instantiate_raft_neighbors_ivf_flat_search

#define instantiate_raft_neighbors_ivf_flat_reconstruct(T, IdxT) \
extern template void raft::neighbors::ivf_flat::reconstruct_list_data<T, IdxT>( \
raft::resources const& handle, \
const raft::neighbors::ivf_flat::index<T, IdxT>& index, \
raft::device_matrix_view<T, uint32_t, row_major> out_vectors, \
uint32_t label, \
uint32_t offset);

instantiate_raft_neighbors_ivf_flat_reconstruct(float, int64_t);
instantiate_raft_neighbors_ivf_flat_reconstruct(int8_t, int64_t);
instantiate_raft_neighbors_ivf_flat_reconstruct(uint8_t, int64_t);

#undef instantiate_raft_neighbors_ivf_flat_reconstruct
24 changes: 24 additions & 0 deletions cpp/include/raft/neighbors/ivf_flat-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,30 @@ void search(raft::resources const& handle,
raft::neighbors::filtering::none_ivf_sample_filter());
}

/**
* @brief Reconstruct vectors of a given cluster
*
*
* @tparam T data element type
* @tparam IdxT type of the indices
*
* @param[in] handle
* @param[in] index ivf-flat constructed index
* @param[out] out_vectors matrix with the vectors contained in the cluster
* @param[in] label cluster index
* @param[in] offset offset
*/
template <typename T, typename IdxT>
void reconstruct_list_data(raft::resources const& handle,
const index<T, IdxT>& index,
device_matrix_view<T, uint32_t, row_major> out_vectors,
uint32_t label,
uint32_t offset)
{
return raft::neighbors::ivf_flat::detail::reconstruct_list_data(
handle, index, out_vectors, label, offset);
}

/** @} */

} // namespace raft::neighbors::ivf_flat
6 changes: 6 additions & 0 deletions cpp/include/raft_runtime/neighbors/ivf_flat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ namespace raft::runtime::neighbors::ivf_flat {
std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
raft::neighbors::ivf_flat::index<T, IdxT>* idx); \
\
void reconstruct_list_data(raft::resources const& handle, \
const raft::neighbors::ivf_flat::index<T, IdxT>& idx, \
device_matrix_view<T, uint32_t, row_major> out_vectors, \
uint32_t label, \
uint32_t offset); \
\
void serialize_file(raft::resources const& handle, \
const std::string& filename, \
const raft::neighbors::ivf_flat::index<T, IdxT>& index); \
Expand Down
38 changes: 38 additions & 0 deletions cpp/src/neighbors/ivf_flat_reconstruct_float_int64_t.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* NOTE: this file is generated by ivf_flat_00_generate.py
*
* Make changes there and run in this directory:
*
* > python ivf_flat_00_generate.py
*
*/

#include <raft/neighbors/ivf_flat-inl.cuh>

#define instantiate_raft_neighbors_ivf_flat_reconstruct(T, IdxT) \
template void raft::neighbors::ivf_flat::reconstruct_list_data<T, IdxT>( \
raft::resources const& handle, \
const raft::neighbors::ivf_flat::index<T, IdxT>& idx, \
raft::device_matrix_view<T, uint32_t, row_major> out_vectors, \
uint32_t label, \
uint32_t offset);

instantiate_raft_neighbors_ivf_flat_reconstruct(float, int64_t);

#undef instantiate_raft_neighbors_ivf_flat_reconstruct
38 changes: 38 additions & 0 deletions cpp/src/neighbors/ivf_flat_reconstruct_int8_t_int64_t.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* NOTE: this file is generated by ivf_flat_00_generate.py
*
* Make changes there and run in this directory:
*
* > python ivf_flat_00_generate.py
*
*/

#include <raft/neighbors/ivf_flat-inl.cuh>

#define instantiate_raft_neighbors_ivf_flat_reconstruct(T, IdxT) \
template void raft::neighbors::ivf_flat::reconstruct_list_data<T, IdxT>( \
raft::resources const& handle, \
const raft::neighbors::ivf_flat::index<T, IdxT>& idx, \
raft::device_matrix_view<T, uint32_t, row_major> out_vectors, \
uint32_t label, \
uint32_t offset);

instantiate_raft_neighbors_ivf_flat_reconstruct(int8_t, int64_t);

#undef instantiate_raft_neighbors_ivf_flat_reconstruct
38 changes: 38 additions & 0 deletions cpp/src/neighbors/ivf_flat_reconstruct_uint8_t_int64_t.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* NOTE: this file is generated by ivf_flat_00_generate.py
*
* Make changes there and run in this directory:
*
* > python ivf_flat_00_generate.py
*
*/

#include <raft/neighbors/ivf_flat-inl.cuh>

#define instantiate_raft_neighbors_ivf_flat_reconstruct(T, IdxT) \
template void raft::neighbors::ivf_flat::reconstruct_list_data<T, IdxT>( \
raft::resources const& handle, \
const raft::neighbors::ivf_flat::index<T, IdxT>& idx, \
raft::device_matrix_view<T, uint32_t, row_major> out_vectors, \
uint32_t label, \
uint32_t offset);

instantiate_raft_neighbors_ivf_flat_reconstruct(uint8_t, int64_t);

#undef instantiate_raft_neighbors_ivf_flat_reconstruct
Loading