Skip to content

Commit

Permalink
chore: Version bump qdrant to v1.3.0
Browse files Browse the repository at this point in the history
- bump qdrant-client to v1.3.0
- minor improvements during vector loading
- cleanup dockerfile
  • Loading branch information
sebastianmarkow committed Jul 1, 2023
1 parent 54e78ae commit d8e6947
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 32 deletions.
33 changes: 17 additions & 16 deletions ann_benchmarks/algorithms/qdrant/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
ARG QDRANT_VERSION=1.1.1
ARG QDRANT_CLIENT_VERSION=1.1.6
ARG QDRANT_VERSION=1.3.0
ARG QDRANT_CLIENT_VERSION=1.3.1
FROM qdrant/qdrant:v${QDRANT_VERSION}

RUN apt-get update
RUN apt-get install -y python3-pip build-essential python3-h5py
RUN python3 -m pip install --upgrade pip setuptools wheel

ARG QDRANT_CLIENT_VERSION
WORKDIR /home/app
COPY requirements.txt .

RUN apt-get update \
&& apt-get install -y python3-pip build-essential python3-h5py \
&& python3 -m pip install --break-system-packages --upgrade pip setuptools wheel \
&& python3 -m pip install --break-system-packages -r requirements.txt \
&& python3 -m pip install --break-system-packages qdrant-client==${QDRANT_CLIENT_VERSION}

COPY requirements.txt run_algorithm.py ./
COPY run_algorithm.py .

ARG QDRANT_VERSION
RUN python3 -m pip install -r requirements.txt
RUN python3 -m pip install qdrant-client==${QDRANT_VERSION}
RUN printf '#!/bin/bash\n\
cd /qdrant && ./qdrant &\n\
sleep 5\n\
python3 -u run_algorithm.py "$@"' > entrypoint.sh \
&& chmod u+x entrypoint.sh

RUN echo '#!/bin/bash' >> entrypoint.sh
RUN echo 'cd /qdrant && ./qdrant &' >> entrypoint.sh
RUN echo 'sleep 5' >> entrypoint.sh
RUN echo 'python3 -u run_algorithm.py "$@"' >> entrypoint.sh
RUN chmod u+x entrypoint.sh
ENTRYPOINT ["/home/app/entrypoint.sh"]
ENTRYPOINT ["/home/app/entrypoint.sh"]
58 changes: 42 additions & 16 deletions ann_benchmarks/algorithms/qdrant/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client import grpc
from qdrant_client.http.models import (CollectionStatus, Distance,
VectorParams, OptimizersConfigDiff, ScalarQuantization,
ScalarQuantizationConfig, ScalarType, HnswConfigDiff)
from qdrant_client.http.models import (
CollectionStatus,
Distance,
VectorParams,
OptimizersConfigDiff,
ScalarQuantization,
ScalarQuantizationConfig,
ScalarType,
HnswConfigDiff,
)

from ..base.module import BaseANN

Expand Down Expand Up @@ -50,13 +57,18 @@ def fit(self, X):
)
)

# Disabling indexing during bulk upload
# https://qdrant.tech/documentation/tutorials/bulk-upload/#disable-indexing-during-upload
# Uploading to multiple shards
# https://qdrant.tech/documentation/tutorials/bulk-upload/#parallel-upload-into-multiple-shards
self._client.recreate_collection(
collection_name=self._collection_name,
shard_number=2,
vectors_config=VectorParams(size=X.shape[1], distance=self._distances_mapping[self._metric]),
optimizers_config=OptimizersConfigDiff(
default_segment_number=2,
max_segment_size=100000000,
indexing_threshold=1000,
memmap_threshold=20000,
indexing_threshold=0,
),
quantization_config=quantization_config,
# TODO: benchmark this as well
Expand All @@ -68,7 +80,20 @@ def fit(self, X):
)

self._client.upload_collection(
collection_name=self._collection_name, vectors=X, ids=list(range(X.shape[0])), parallel=1
collection_name=self._collection_name,
vectors=X,
ids=list(range(X.shape[0])),
batch_size=BATCH_SIZE,
parallel=1,
)

# Re-enabling indexing
self._client.update_collection(
collection_name=self._collection_name,
optimizers_config=OptimizersConfigDiff(
indexing_threshold=20000,
),
timeout=TIMEOUT,
)

# wait for vectors to be fully indexed
Expand All @@ -92,20 +117,19 @@ def set_query_arguments(self, hnsw_ef, rescore):
self._search_params["rescore"] = rescore

def query(self, q, n):
quantization_search_params = grpc.QuantizationSearchParams(
ignore=False,
rescore=self._search_params["rescore"],
)

search_request = grpc.SearchPoints(
collection_name=self._collection_name,
vector=q.tolist(),
limit=n,
with_payload=grpc.WithPayloadSelector(enable=False),
with_vectors=grpc.WithVectorsSelector(enable=False),
params=grpc.SearchParams(
hnsw_ef=self._search_params["hnsw_ef"],
quantization=quantization_search_params,
)
quantization=grpc.QuantizationSearchParams(
ignore=False,
rescore=self._search_params["rescore"],
),
),
)

search_result = self._client.grpc_points.Search(search_request, timeout=TIMEOUT)
Expand Down Expand Up @@ -135,11 +159,13 @@ def iter_batches(iterable, batch_size) -> Iterable[List[Any]]:
vector=q.tolist(),
limit=n,
with_payload=grpc.WithPayloadSelector(enable=False),
with_vectors=grpc.WithVectorsSelector(enable=False),
params=grpc.SearchParams(
hnsw_ef=self._search_params["hnsw_ef"],
quantization=quantization_search_params,
)
) for q in X
),
)
for q in X
]

self.batch_results = []
Expand All @@ -162,4 +188,4 @@ def get_batch_results(self):

def __str__(self):
hnsw_ef = self._search_params["hnsw_ef"]
return f"Qdrant(quantization={self._quantization}, hnsw_ef={hnsw_ef})"
return f"Qdrant(quantization={self._quantization}, hnsw_ef={hnsw_ef})"

0 comments on commit d8e6947

Please sign in to comment.