From e1782e2cb55454ced523eb04b95e6f8ad63612e5 Mon Sep 17 00:00:00 2001 From: Lu Qiu Date: Wed, 4 Sep 2024 17:49:02 -0700 Subject: [PATCH 1/5] Add scalar and inverted index --- java/core/lance-jni/src/utils.rs | 104 ++++++++++++- .../main/java/com/lancedb/lance/Dataset.java | 139 +++++++++-------- .../com/lancedb/lance/index/IndexParams.java | 61 ++++++-- .../com/lancedb/lance/index/IndexType.java | 32 ++-- .../test/java/com/lancedb/lance/JNITest.java | 147 ++++++++---------- 5 files changed, 295 insertions(+), 188 deletions(-) diff --git a/java/core/lance-jni/src/utils.rs b/java/core/lance-jni/src/utils.rs index 70f72b76e1..b25d9670d3 100644 --- a/java/core/lance-jni/src/utils.rs +++ b/java/core/lance-jni/src/utils.rs @@ -19,6 +19,7 @@ use jni::objects::{JObject, JString}; use jni::JNIEnv; use lance::dataset::{WriteMode, WriteParams}; use lance::index::vector::{StageParams, VectorIndexParams}; +use lance_index::scalar::{InvertedIndexParams, ScalarIndexParams, ScalarIndexType}; use lance_index::vector::hnsw::builder::HnswBuildParams; use lance_index::vector::ivf::IvfBuildParams; use lance_index::vector::pq::PQBuildParams; @@ -114,7 +115,7 @@ pub fn get_index_params( let vector_index_params_option_object = env .call_method( - index_params_obj, + &index_params_obj, "getVectorIndexParams", "()Ljava/util/Optional;", &[], @@ -246,10 +247,101 @@ pub fn get_index_params( None }; - match vector_index_params_option { - Some(params) => Ok(Box::new(params) as Box), - None => Err(Error::input_error( - "VectorIndexParams not present".to_string(), - )), + if vector_index_params_option.is_some() { + return Ok(Box::new(vector_index_params_option.unwrap()) as Box); + } + + let scalar_index_params_option_object = env + .call_method( + &index_params_obj, + "getScalarIndexParams", + "()Ljava/util/Optional;", + &[], + )? + .l()?; + + let scalar_index_params_option = if env + .call_method(&scalar_index_params_option_object, "isPresent", "()Z", &[])? + .z()? + { + let scalar_index_params_obj = env + .call_method( + &scalar_index_params_option_object, + "get", + "()Ljava/lang/Object;", + &[], + )? + .l()?; + + let force_index_type: Option = env.get_optional_from_method( + &scalar_index_params_obj, + "getForceIndexType", + |env, force_index_type_obj| { + let enum_name = env + .call_method(&force_index_type_obj, "name", "()Ljava/lang/String;", &[])? + .l()?; + let enum_str: String = env.get_string(&JString::from(enum_name))?.into(); + + match enum_str.as_str() { + "BTREE" => Ok(ScalarIndexType::BTree), + "BITMAP" => Ok(ScalarIndexType::Bitmap), + "LABEL_LIST" => Ok(ScalarIndexType::LabelList), + "INVERTED" => Ok(ScalarIndexType::Inverted), + _ => Err(Error::input_error(format!( + "Unknown ScalarIndexType: {}", + enum_str + ))), + } + }, + )?; + Some(ScalarIndexParams { force_index_type }) + } else { + None + }; + + if scalar_index_params_option.is_some() { + return Ok(Box::new(scalar_index_params_option.unwrap()) as Box); } + + let inverted_index_params_option_object = env + .call_method( + &index_params_obj, + "getInvertedIndexParams", + "()Ljava/util/Optional;", + &[], + )? + .l()?; + + let inverted_index_params_option = if env + .call_method( + &inverted_index_params_option_object, + "isPresent", + "()Z", + &[], + )? + .z()? + { + let inverted_index_params_obj = env + .call_method( + &inverted_index_params_option_object, + "get", + "()Ljava/lang/Object;", + &[], + )? + .l()?; + + let with_position = + env.get_boolean_from_method(&inverted_index_params_obj, "isWithPosition")?; + Some(InvertedIndexParams { with_position }) + } else { + None + }; + + if inverted_index_params_option.is_some() { + return Ok(Box::new(inverted_index_params_option.unwrap()) as Box); + } + + Err(Error::input_error( + "No valid index params presented".to_string(), + ))? } diff --git a/java/core/src/main/java/com/lancedb/lance/Dataset.java b/java/core/src/main/java/com/lancedb/lance/Dataset.java index a7a5d13efe..9f93da8610 100644 --- a/java/core/src/main/java/com/lancedb/lance/Dataset.java +++ b/java/core/src/main/java/com/lancedb/lance/Dataset.java @@ -1,21 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. */ package com.lancedb.lance; import com.lancedb.lance.index.IndexParams; import com.lancedb.lance.index.IndexType; +import com.lancedb.lance.index.ScalarIndexParams; import com.lancedb.lance.ipc.LanceScanner; import com.lancedb.lance.ipc.ScanOptions; import java.io.Closeable; @@ -31,12 +30,9 @@ import org.apache.arrow.vector.types.pojo.Schema; /** - * Class representing a Lance dataset, interfacing with the native lance - * library. This class - * provides functionality to open and manage datasets with native code. The - * native library is loaded - * statically and utilized through native methods. It implements the - * {@link java.io.Closeable} + * Class representing a Lance dataset, interfacing with the native lance library. This class + * provides functionality to open and manage datasets with native code. The native library is loaded + * statically and utilized through native methods. It implements the {@link java.io.Closeable} * interface to ensure proper resource management. */ public class Dataset implements Closeable { @@ -51,16 +47,15 @@ public class Dataset implements Closeable { private final LockManager lockManager = new LockManager(); - private Dataset() { - } + private Dataset() {} /** * Creates an empty dataset. * * @param allocator the buffer allocator - * @param path dataset uri - * @param schema dataset schema - * @param params write params + * @param path dataset uri + * @param schema dataset schema + * @param params write params * @return Dataset */ public static Dataset create(BufferAllocator allocator, String path, Schema schema, @@ -71,9 +66,9 @@ public static Dataset create(BufferAllocator allocator, String path, Schema sche Preconditions.checkNotNull(params); try (ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator)) { Data.exportSchema(allocator, schema, null, arrowSchema); - Dataset dataset = createWithFfiSchema(arrowSchema.memoryAddress(), - path, params.getMaxRowsPerFile(), params.getMaxRowsPerGroup(), - params.getMaxBytesPerFile(), params.getMode()); + Dataset dataset = + createWithFfiSchema(arrowSchema.memoryAddress(), path, params.getMaxRowsPerFile(), + params.getMaxRowsPerGroup(), params.getMaxBytesPerFile(), params.getMode()); dataset.allocator = allocator; return dataset; } @@ -83,20 +78,19 @@ public static Dataset create(BufferAllocator allocator, String path, Schema sche * Create a dataset with given stream. * * @param allocator buffer allocator - * @param stream arrow stream - * @param path dataset uri - * @param params write parameters + * @param stream arrow stream + * @param path dataset uri + * @param params write parameters * @return Dataset */ - public static Dataset create(BufferAllocator allocator, ArrowArrayStream stream, - String path, WriteParams params) { + public static Dataset create(BufferAllocator allocator, ArrowArrayStream stream, String path, + WriteParams params) { Preconditions.checkNotNull(allocator); Preconditions.checkNotNull(stream); Preconditions.checkNotNull(path); Preconditions.checkNotNull(params); - Dataset dataset = createWithFfiStream(stream.memoryAddress(), path, - params.getMaxRowsPerFile(), params.getMaxRowsPerGroup(), - params.getMaxBytesPerFile(), params.getMode()); + Dataset dataset = createWithFfiStream(stream.memoryAddress(), path, params.getMaxRowsPerFile(), + params.getMaxRowsPerGroup(), params.getMaxBytesPerFile(), params.getMode()); dataset.allocator = allocator; return dataset; } @@ -122,7 +116,7 @@ public static Dataset open(String path) { /** * Open a dataset from the specified path. * - * @param path file path + * @param path file path * @param options the open options * @return Dataset */ @@ -133,7 +127,7 @@ public static Dataset open(String path, ReadOptions options) { /** * Open a dataset from the specified path. * - * @param path file path + * @param path file path * @param allocator Arrow buffer allocator * @return Dataset */ @@ -145,8 +139,8 @@ public static Dataset open(String path, BufferAllocator allocator) { * Open a dataset from the specified path with additional options. * * @param allocator Arrow buffer allocator - * @param path file path - * @param options the open options + * @param path file path + * @param options the open options * @return Dataset */ public static Dataset open(BufferAllocator allocator, String path, ReadOptions options) { @@ -156,18 +150,17 @@ public static Dataset open(BufferAllocator allocator, String path, ReadOptions o /** * Open a dataset from the specified path with additional options. * - * @param path file path + * @param path file path * @param options the open options * @return Dataset */ - private static Dataset open(BufferAllocator allocator, boolean selfManagedAllocator, - String path, ReadOptions options) { + private static Dataset open(BufferAllocator allocator, boolean selfManagedAllocator, String path, + ReadOptions options) { Preconditions.checkNotNull(path); Preconditions.checkNotNull(allocator); Preconditions.checkNotNull(options); - Dataset dataset = openNative(path, options.getVersion(), - options.getBlockSize(), options.getIndexCacheSize(), - options.getMetadataCacheSize()); + Dataset dataset = openNative(path, options.getVersion(), options.getBlockSize(), + options.getIndexCacheSize(), options.getMetadataCacheSize()); dataset.allocator = allocator; dataset.selfManagedAllocator = selfManagedAllocator; return dataset; @@ -179,16 +172,15 @@ private static native Dataset openNative(String path, Optional version, /** * Create a new version of dataset. * - * @param allocator the buffer allocator - * @param path The file path of the dataset to open. - * @param operation The operation to apply to the dataset. - * @param readVersion The version of the dataset that was used as the base for - * the changes. - * This is not needed for overwrite or restore operations. + * @param allocator the buffer allocator + * @param path The file path of the dataset to open. + * @param operation The operation to apply to the dataset. + * @param readVersion The version of the dataset that was used as the base for the changes. This + * is not needed for overwrite or restore operations. * @return A new instance of {@link Dataset} linked to the opened dataset. */ - public static Dataset commit(BufferAllocator allocator, String path, - FragmentOperation operation, Optional readVersion) { + public static Dataset commit(BufferAllocator allocator, String path, FragmentOperation operation, + Optional readVersion) { Preconditions.checkNotNull(allocator); Preconditions.checkNotNull(path); Preconditions.checkNotNull(operation); @@ -213,8 +205,7 @@ public LanceScanner newScan() { /** * Create a new Dataset Scanner. * - * @param batchSize the scan options with batch size, columns filter, and - * substrait + * @param batchSize the scan options with batch size, columns filter, and substrait * @return a dataset scanner */ public LanceScanner newScan(long batchSize) { @@ -260,23 +251,51 @@ public long latestVersion() { private native long nativeLatestVersion(); /** - * Creates a new index on the dataset. - * Only vector indexes are supported. + * Creates a new index on the dataset. Only vector indexes are supported. * - * @param columns the columns to index from + * @param columns the columns to index from * @param indexType the index type - * @param name the name of the created index - * @param params index params - * @param replace whether to replace the existing index + * @param name the name of the created index + * @param params index params + * @param replace whether to replace the existing index */ public void createIndex(List columns, IndexType indexType, Optional name, IndexParams params, boolean replace) { try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) { Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); + params = adjustParamsForIndexType(indexType, params); nativeCreateIndex(columns, indexType.getValue(), name, params, replace); } } + /** + * Adjusts the IndexParams based on the given IndexType. + * + * @param indexType The type of index to create. + * @param params The original index parameters. + * @return Adjusted IndexParams suitable for the given IndexType. + */ + private IndexParams adjustParamsForIndexType(IndexType indexType, IndexParams params) { + ScalarIndexParams.ScalarIndexType scalarIndexType; + + switch (indexType) { + case BITMAP: + scalarIndexType = ScalarIndexParams.ScalarIndexType.BITMAP; + break; + case BTREE: + scalarIndexType = ScalarIndexParams.ScalarIndexType.BTREE; + break; + case LABEL_LIST: + scalarIndexType = ScalarIndexParams.ScalarIndexType.LABEL_LIST; + break; + default: + return params; + } + + return new IndexParams.Builder().setScalarIndexParams( + new ScalarIndexParams.Builder().setForceIndexType(scalarIndexType).build()).build(); + } + private native void nativeCreateIndex(List columns, int indexTypeCode, Optional name, IndexParams params, boolean replace); @@ -344,8 +363,7 @@ public List listIndexes() { private native List nativeListIndexes(); /** - * Closes this dataset and releases any system resources associated with it. If - * the dataset is + * Closes this dataset and releases any system resources associated with it. If the dataset is * already closed, then invoking this method has no effect. */ @Override @@ -362,8 +380,7 @@ public void close() { } /** - * Native method to release the Lance dataset resources associated with the - * given handle. + * Native method to release the Lance dataset resources associated with the given handle. * * @param handle The native handle to the dataset resource. */ diff --git a/java/core/src/main/java/com/lancedb/lance/index/IndexParams.java b/java/core/src/main/java/com/lancedb/lance/index/IndexParams.java index 93ff3f18a9..0a85fcb74d 100644 --- a/java/core/src/main/java/com/lancedb/lance/index/IndexParams.java +++ b/java/core/src/main/java/com/lancedb/lance/index/IndexParams.java @@ -1,15 +1,13 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. */ package com.lancedb.lance.index; @@ -24,21 +22,26 @@ public class IndexParams { private final DistanceType distanceType; private final Optional vectorIndexParams; + private final Optional scalarIndexParams; + private final Optional invertedIndexParams; private IndexParams(Builder builder) { this.distanceType = builder.distanceType; this.vectorIndexParams = builder.vectorIndexParams; + this.scalarIndexParams = builder.scalarIndexParams; + this.invertedIndexParams = builder.invertedIndexParams; } public static class Builder { private DistanceType distanceType = DistanceType.L2; private Optional vectorIndexParams = Optional.empty(); + private Optional scalarIndexParams = Optional.empty(); + private Optional invertedIndexParams = Optional.empty(); public Builder() {} /** - * Set the distance type for calculating the distance between vectors. - * Default to L2. + * Set the distance type for calculating the distance between vectors. Default to L2. */ public Builder setDistanceType(DistanceType distanceType) { this.distanceType = distanceType; @@ -53,6 +56,24 @@ public Builder setVectorIndexParams(VectorIndexParams vectorIndexParams) { return this; } + /** + * @param scalarIndexParams parameters for creating a scalar index. + * @return Builder + */ + public Builder setScalarIndexParams(ScalarIndexParams scalarIndexParams) { + this.scalarIndexParams = Optional.of(scalarIndexParams); + return this; + } + + /** + * @param invertedIndexParams parameters for creating an inverted index. + * @return Builder + */ + public Builder setInvertedIndexParams(InvertedIndexParams invertedIndexParams) { + this.invertedIndexParams = Optional.of(invertedIndexParams); + return this; + } + public IndexParams build() { return new IndexParams(this); } @@ -66,11 +87,19 @@ public Optional getVectorIndexParams() { return vectorIndexParams; } + public Optional getScalarIndexParams() { + return scalarIndexParams; + } + + public Optional getInvertedIndexParams() { + return invertedIndexParams; + } + @Override public String toString() { - return new ToStringBuilder(this) - .append("distanceType", distanceType) - .append("vectorIndexParams", vectorIndexParams.orElse(null)) - .toString(); + return new ToStringBuilder(this).append("distanceType", distanceType) + .append("vectorIndexParams", vectorIndexParams.orElse(null)) + .append("scalarIndexParams", scalarIndexParams.orElse(null)) + .append("invertedIndexParams", invertedIndexParams.orElse(null)).toString(); } -} \ No newline at end of file +} diff --git a/java/core/src/main/java/com/lancedb/lance/index/IndexType.java b/java/core/src/main/java/com/lancedb/lance/index/IndexType.java index d2499e23d2..311bca7dce 100644 --- a/java/core/src/main/java/com/lancedb/lance/index/IndexType.java +++ b/java/core/src/main/java/com/lancedb/lance/index/IndexType.java @@ -1,40 +1,28 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. */ package com.lancedb.lance.index; public enum IndexType { - SCALAR(0), - BTREE(1), - BITMAP(2), - LABEL_LIST(3), - INVERTED(4), - VECTOR(100), - IVF_FLAT(101), - IVF_SQ(102), - IVF_PQ(103), - IVF_HNSW_SQ(104), - IVF_HNSW_PQ(105); + SCALAR(0), BTREE(1), BITMAP(2), LABEL_LIST(3), INVERTED(4), VECTOR(100); private final int value; IndexType(int value) { - this.value = value; + this.value = value; } public int getValue() { - return value; + return value; } } diff --git a/java/core/src/test/java/com/lancedb/lance/JNITest.java b/java/core/src/test/java/com/lancedb/lance/JNITest.java index 60b9731a7e..8fd45bacee 100644 --- a/java/core/src/test/java/com/lancedb/lance/JNITest.java +++ b/java/core/src/test/java/com/lancedb/lance/JNITest.java @@ -1,15 +1,13 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. */ package com.lancedb.lance; @@ -24,6 +22,9 @@ import com.lancedb.lance.test.JniTestHelper; import com.lancedb.lance.index.DistanceType; import com.lancedb.lance.index.IndexParams; +import com.lancedb.lance.index.InvertedIndexParams; +import com.lancedb.lance.index.ScalarIndexParams; +import com.lancedb.lance.index.ScalarIndexParams.ScalarIndexType; import com.lancedb.lance.index.vector.HnswBuildParams; import com.lancedb.lance.index.vector.IvfBuildParams; import com.lancedb.lance.index.vector.PQBuildParams; @@ -44,96 +45,61 @@ public void testIntsOpt() { @Test public void testQuery() { - JniTestHelper.parseQuery(Optional.of(new Query.Builder() - .setColumn("column") - .setKey(new float[] { 1.0f, 2.0f, 3.0f }) - .setK(10) - .setNprobes(20) - .setEf(30) - .setRefineFactor(40) - .setDistanceType(DistanceType.L2) - .setUseIndex(true) - .build())); + JniTestHelper.parseQuery(Optional.of(new Query.Builder().setColumn("column") + .setKey(new float[] {1.0f, 2.0f, 3.0f}).setK(10).setNprobes(20).setEf(30) + .setRefineFactor(40).setDistanceType(DistanceType.L2).setUseIndex(true).build())); } @Test public void testIvfFlatIndexParams() { JniTestHelper.parseIndexParams(new IndexParams.Builder() - .setVectorIndexParams( - VectorIndexParams.ivfFlat(10, DistanceType.L2)) - .build()); + .setVectorIndexParams(VectorIndexParams.ivfFlat(10, DistanceType.L2)).build()); } @Test public void testIvfPqIndexParams() { JniTestHelper.parseIndexParams(new IndexParams.Builder() - .setVectorIndexParams( - VectorIndexParams.ivfPq(10, 8, 4, DistanceType.L2, 50)) - .build()); + .setVectorIndexParams(VectorIndexParams.ivfPq(10, 8, 4, DistanceType.L2, 50)).build()); } @Test public void testIvfPqWithCustomParamsIndexParams() { - IvfBuildParams ivf = new IvfBuildParams.Builder() - .setNumPartitions(20) - .setMaxIters(100) - .setSampleRate(512) - .build(); - PQBuildParams pq = new PQBuildParams.Builder() - .setNumSubVectors(8) - .setNumBits(8) - .setMaxIters(100) - .setKmeansRedos(3) - .setSampleRate(1024) - .build(); + IvfBuildParams ivf = new IvfBuildParams.Builder().setNumPartitions(20).setMaxIters(100) + .setSampleRate(512).build(); + PQBuildParams pq = new PQBuildParams.Builder().setNumSubVectors(8).setNumBits(8) + .setMaxIters(100).setKmeansRedos(3).setSampleRate(1024).build(); JniTestHelper.parseIndexParams(new IndexParams.Builder() - .setVectorIndexParams( - VectorIndexParams.withIvfPqParams(DistanceType.Cosine, ivf, pq)) + .setVectorIndexParams(VectorIndexParams.withIvfPqParams(DistanceType.Cosine, ivf, pq)) .build()); } @Test public void testIvfHnswPqIndexParams() { - IvfBuildParams ivf = new IvfBuildParams.Builder() - .setNumPartitions(15) - .build(); - HnswBuildParams hnsw = new HnswBuildParams.Builder() - .setMaxLevel((short) 10) - .setM(30) - .setEfConstruction(200) - .setPrefetchDistance(3) - .build(); - PQBuildParams pq = new PQBuildParams.Builder() - .setNumSubVectors(16) - .setNumBits(8) - .build(); + IvfBuildParams ivf = new IvfBuildParams.Builder().setNumPartitions(15).build(); + HnswBuildParams hnsw = new HnswBuildParams.Builder().setMaxLevel((short) 10).setM(30) + .setEfConstruction(200).setPrefetchDistance(3).build(); + PQBuildParams pq = new PQBuildParams.Builder().setNumSubVectors(16).setNumBits(8).build(); JniTestHelper.parseIndexParams(new IndexParams.Builder() - .setVectorIndexParams( - VectorIndexParams.withIvfHnswPqParams(DistanceType.L2, ivf, hnsw, pq)) + .setVectorIndexParams(VectorIndexParams.withIvfHnswPqParams(DistanceType.L2, ivf, hnsw, pq)) .build()); } @Test public void testIvfHnswSqIndexParams() { - IvfBuildParams ivf = new IvfBuildParams.Builder() - .setNumPartitions(25) - .build(); - HnswBuildParams hnsw = new HnswBuildParams.Builder() - .setMaxLevel((short) 8) - .setM(25) - .setEfConstruction(175) - .build(); - SQBuildParams sq = new SQBuildParams.Builder() - .setNumBits((short) 16) - .setSampleRate(512) - .build(); - - JniTestHelper.parseIndexParams(new IndexParams.Builder() - .setVectorIndexParams( - VectorIndexParams.withIvfHnswSqParams(DistanceType.Dot, ivf, hnsw, sq)) - .build()); + IvfBuildParams ivf = new IvfBuildParams.Builder().setNumPartitions(25).build(); + HnswBuildParams hnsw = new HnswBuildParams.Builder().setMaxLevel((short) 8).setM(25) + .setEfConstruction(175).build(); + SQBuildParams sq = + new SQBuildParams.Builder().setNumBits((short) 16).setSampleRate(512).build(); + + JniTestHelper + .parseIndexParams( + new IndexParams.Builder() + .setVectorIndexParams( + VectorIndexParams.withIvfHnswSqParams(DistanceType.Dot, ivf, hnsw, sq)) + .build()); } @Test @@ -143,11 +109,8 @@ public void testInvalidCombinationPqAndSq() { SQBuildParams sq = new SQBuildParams.Builder().build(); assertThrows(IllegalArgumentException.class, () -> { - new VectorIndexParams.Builder(ivf) - .setDistanceType(DistanceType.L2) - .setPqParams(pq) - .setSqParams(sq) - .build(); + new VectorIndexParams.Builder(ivf).setDistanceType(DistanceType.L2).setPqParams(pq) + .setSqParams(sq).build(); }); } @@ -157,9 +120,7 @@ public void testInvalidCombinationHnswWithoutPqOrSq() { HnswBuildParams hnsw = new HnswBuildParams.Builder().build(); assertThrows(IllegalArgumentException.class, () -> { - new VectorIndexParams.Builder(ivf) - .setDistanceType(DistanceType.L2) - .setHnswParams(hnsw) + new VectorIndexParams.Builder(ivf).setDistanceType(DistanceType.L2).setHnswParams(hnsw) .build(); }); } @@ -170,10 +131,30 @@ public void testInvalidCombinationSqWithoutHnsw() { SQBuildParams sq = new SQBuildParams.Builder().build(); assertThrows(IllegalArgumentException.class, () -> { - new VectorIndexParams.Builder(ivf) - .setDistanceType(DistanceType.L2) - .setSqParams(sq) - .build(); + new VectorIndexParams.Builder(ivf).setDistanceType(DistanceType.L2).setSqParams(sq).build(); }); } + + @Test + public void testInvertedIndex() { + JniTestHelper.parseIndexParams(new IndexParams.Builder() + .setInvertedIndexParams(new InvertedIndexParams.Builder().setWithPosition(true).build()) + .build()); + } + + @Test + public void testScalarIndex() { + JniTestHelper.parseIndexParams(new IndexParams.Builder() + .setScalarIndexParams( + new ScalarIndexParams.Builder().setForceIndexType(ScalarIndexType.BITMAP).build()) + .build()); + JniTestHelper.parseIndexParams(new IndexParams.Builder() + .setScalarIndexParams( + new ScalarIndexParams.Builder().setForceIndexType(ScalarIndexType.BTREE).build()) + .build()); + JniTestHelper.parseIndexParams(new IndexParams.Builder() + .setScalarIndexParams( + new ScalarIndexParams.Builder().setForceIndexType(ScalarIndexType.LABEL_LIST).build()) + .build()); + } } From da453d19e6996efc8f1716b2825242e2dc2acc99 Mon Sep 17 00:00:00 2001 From: Lu Qiu Date: Wed, 4 Sep 2024 18:04:30 -0700 Subject: [PATCH 2/5] Support index --- .../com/lancedb/lance/TestVectorDataset.java | 66 +++++---- .../com/lancedb/lance/VectorSearchTest.java | 129 ++++++++++-------- 2 files changed, 107 insertions(+), 88 deletions(-) diff --git a/java/core/src/test/java/com/lancedb/lance/TestVectorDataset.java b/java/core/src/test/java/com/lancedb/lance/TestVectorDataset.java index 564d47dd25..70d463b13a 100644 --- a/java/core/src/test/java/com/lancedb/lance/TestVectorDataset.java +++ b/java/core/src/test/java/com/lancedb/lance/TestVectorDataset.java @@ -1,15 +1,13 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. */ package com.lancedb.lance; @@ -28,6 +26,7 @@ import com.lancedb.lance.index.DistanceType; import com.lancedb.lance.index.IndexParams; import com.lancedb.lance.index.IndexType; +import com.lancedb.lance.index.ScalarIndexParams; import com.lancedb.lance.index.vector.VectorIndexParams; import java.io.IOException; @@ -36,7 +35,8 @@ public class TestVectorDataset implements AutoCloseable { public static final String vectorColumnName = "vec"; - public static final String indexName = "idx"; + public static final String vectorIndexName = "vector_index"; + public static final String scalarIndexName = "scalar_index"; private final Path datasetPath; private Schema schema; private BufferAllocator allocator; @@ -55,21 +55,20 @@ private Schema createSchema() { Map metadata = new HashMap<>(); metadata.put("dataset", "vector"); - List fields = Arrays.asList( - new Field("i", FieldType.nullable(new ArrowType.Int(32, true)), null), - new Field("s", FieldType.nullable(new ArrowType.Utf8()), null), - new Field(vectorColumnName, FieldType.nullable(new ArrowType.FixedSizeList(32)), - Collections.singletonList(new Field("item", - FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null)))); + List fields = + Arrays.asList(new Field("i", FieldType.nullable(new ArrowType.Int(32, true)), null), + new Field("s", FieldType.nullable(new ArrowType.Utf8()), null), + new Field(vectorColumnName, FieldType.nullable(new ArrowType.FixedSizeList(32)), + Collections.singletonList(new Field("item", + FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), + null)))); return new Schema(fields, metadata); } private Dataset createDataset() throws IOException { - WriteParams writeParams = new WriteParams.Builder() - .withMaxRowsPerGroup(10) - .withMaxRowsPerFile(200) - .build(); + WriteParams writeParams = + new WriteParams.Builder().withMaxRowsPerGroup(10).withMaxRowsPerFile(200).build(); Dataset.create(allocator, datasetPath.toString(), schema, writeParams).close(); @@ -102,7 +101,8 @@ private FragmentMetadata createFragment(int batchIndex) throws IOException { root.setRowCount(80); WriteParams fragmentWriteParams = new WriteParams.Builder().build(); - return Fragment.create(datasetPath.toString(), allocator, root, Optional.of(batchIndex), fragmentWriteParams); + return Fragment.create(datasetPath.toString(), allocator, root, Optional.of(batchIndex), + fragmentWriteParams); } } @@ -127,18 +127,26 @@ public Dataset appendNewData() throws IOException { root.setRowCount(10); WriteParams writeParams = new WriteParams.Builder().build(); - fragmentMetadata = Fragment.create(datasetPath.toString(), allocator, root, Optional.empty(), - writeParams); + fragmentMetadata = + Fragment.create(datasetPath.toString(), allocator, root, Optional.empty(), writeParams); } - FragmentOperation.Append appendOp = new FragmentOperation.Append(Collections.singletonList(fragmentMetadata)); + FragmentOperation.Append appendOp = + new FragmentOperation.Append(Collections.singletonList(fragmentMetadata)); return Dataset.commit(allocator, datasetPath.toString(), appendOp, Optional.of(2L)); } - public void createIndex(Dataset dataset) { + public void createVectorIndex(Dataset dataset) { IndexParams params = new IndexParams.Builder() - .setVectorIndexParams(VectorIndexParams.ivfPq(2, 8, 2, DistanceType.L2, 2)) - .build(); - dataset.createIndex(Arrays.asList(vectorColumnName), IndexType.VECTOR, Optional.of(indexName), params, true); + .setVectorIndexParams(VectorIndexParams.ivfPq(2, 8, 2, DistanceType.L2, 2)).build(); + dataset.createIndex(Arrays.asList(vectorColumnName), IndexType.VECTOR, + Optional.of(vectorIndexName), params, true); + } + + public void createScalarIndex(Dataset dataset) { + IndexParams params = new IndexParams.Builder() + .setScalarIndexParams(new ScalarIndexParams.Builder().build()).build(); + dataset.createIndex(Arrays.asList("i"), IndexType.SCALAR, Optional.of(scalarIndexName), params, + true); } @Override @@ -147,4 +155,4 @@ public void close() { allocator.close(); } } -} \ No newline at end of file +} diff --git a/java/core/src/test/java/com/lancedb/lance/VectorSearchTest.java b/java/core/src/test/java/com/lancedb/lance/VectorSearchTest.java index 914d9e1505..42fc513132 100644 --- a/java/core/src/test/java/com/lancedb/lance/VectorSearchTest.java +++ b/java/core/src/test/java/com/lancedb/lance/VectorSearchTest.java @@ -1,15 +1,13 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. */ package com.lancedb.lance; @@ -34,7 +32,7 @@ import java.util.Set; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import org.junit.jupiter.params.provider.CsvSource; import static org.junit.jupiter.api.Assertions.*; @@ -42,9 +40,9 @@ // // The dataset has the following columns: // -// i - i32 : [0, 1, ..., 399] -// s - &str : ["s-0", "s-1", ..., "s-399"] -// vec - [f32; 32]: [[0, 1, ... 31], [32, ..., 63], ... [..., (80 * 5 * 32) - 1]] +// i - i32 : [0, 1, ..., 399] +// s - &str : ["s-0", "s-1", ..., "s-399"] +// vec - [f32; 32]: [[0, 1, ... 31], [32, ..., 63], ... [..., (80 * 5 * 32) - 1]] // // An IVF-PQ index with 2 partitions is trained on this data public class VectorSearchTest { @@ -52,13 +50,27 @@ public class VectorSearchTest { Path tempDir; @Test - void test_create_index() throws Exception { - try (TestVectorDataset testVectorDataset = new TestVectorDataset(tempDir.resolve("test_create_index"))) { + void test_create_vector_index() throws Exception { + try (TestVectorDataset testVectorDataset = + new TestVectorDataset(tempDir.resolve("test_create_vector_index"))) { try (Dataset dataset = testVectorDataset.create()) { - testVectorDataset.createIndex(dataset); + testVectorDataset.createVectorIndex(dataset); List indexes = dataset.listIndexes(); assertEquals(1, indexes.size()); - assertEquals(TestVectorDataset.indexName, indexes.get(0)); + assertEquals(TestVectorDataset.vectorIndexName, indexes.get(0)); + } + } + } + + @Test + void test_create_scalar_index() throws Exception { + try (TestVectorDataset testVectorDataset = + new TestVectorDataset(tempDir.resolve("test_create_scalar_index"))) { + try (Dataset dataset = testVectorDataset.create()) { + testVectorDataset.createScalarIndex(dataset); + List indexes = dataset.listIndexes(); + assertEquals(1, indexes.size()); + assertEquals(TestVectorDataset.scalarIndexName, indexes.get(0)); } } } @@ -68,50 +80,50 @@ void test_create_index() throws Exception { // Directly panic instead of throwing an exception // @Test // void search_invalid_vector() throws Exception { - // try (TestVectorDataset testVectorDataset = new TestVectorDataset(tempDir.resolve("test_create_index"))) { - // try (Dataset dataset = testVectorDataset.create()) { - // float[] key = new float[30]; - // for (int i = 0; i < 30; i++) { - // key[i] = (float) (i + 30); - // } - // ScanOptions options = new ScanOptions.Builder() - // .nearest(new Query.Builder() - // .setColumn(TestVectorDataset.vectorColumnName) - // .setKey(key) - // .setK(5) - // .setUseIndex(false) - // .build()) - // .build(); - // assertThrows(IllegalArgumentException.class, () -> { - // try (Scanner scanner = dataset.newScan(options)) { - // try (ArrowReader reader = scanner.scanBatches()) { - // } - // } - // }); - // } - // } + // try (TestVectorDataset testVectorDataset = new + // TestVectorDataset(tempDir.resolve("test_create_index"))) { + // try (Dataset dataset = testVectorDataset.create()) { + // float[] key = new float[30]; + // for (int i = 0; i < 30; i++) { + // key[i] = (float) (i + 30); + // } + // ScanOptions options = new ScanOptions.Builder() + // .nearest(new Query.Builder() + // .setColumn(TestVectorDataset.vectorColumnName) + // .setKey(key) + // .setK(5) + // .setUseIndex(false) + // .build()) + // .build(); + // assertThrows(IllegalArgumentException.class, () -> { + // try (Scanner scanner = dataset.newScan(options)) { + // try (ArrowReader reader = scanner.scanBatches()) { + // } + // } + // }); + // } + // } // } @ParameterizedTest - @ValueSource(booleans = { false, true }) - void test_knn(boolean createVectorIndex) throws Exception { + @CsvSource({"false, false", "false, true", "true, false", "true, true"}) + void test_knn(boolean createVectorIndex, boolean createScalarIndex) throws Exception { try (TestVectorDataset testVectorDataset = new TestVectorDataset(tempDir.resolve("test_knn"))) { try (Dataset dataset = testVectorDataset.create()) { if (createVectorIndex) { - testVectorDataset.createIndex(dataset); + testVectorDataset.createVectorIndex(dataset); + } + if (createScalarIndex) { + testVectorDataset.createScalarIndex(dataset); } float[] key = new float[32]; for (int i = 0; i < 32; i++) { key[i] = (float) (i + 32); } ScanOptions options = new ScanOptions.Builder() - .nearest(new Query.Builder() - .setColumn(TestVectorDataset.vectorColumnName) - .setKey(key) - .setK(5) - .setUseIndex(false) - .build()) + .nearest(new Query.Builder().setColumn(TestVectorDataset.vectorColumnName).setKey(key) + .setK(5).setUseIndex(false).build()) .build(); try (Scanner scanner = dataset.newScan(options)) { try (ArrowReader reader = scanner.scanBatches()) { @@ -124,7 +136,8 @@ void test_knn(boolean createVectorIndex) throws Exception { assertEquals(4, root.getSchema().getFields().size(), "Expected 4 columns"); assertEquals("i", root.getSchema().getFields().get(0).getName()); assertEquals("s", root.getSchema().getFields().get(1).getName()); - assertEquals(TestVectorDataset.vectorColumnName, root.getSchema().getFields().get(2).getName()); + assertEquals(TestVectorDataset.vectorColumnName, + root.getSchema().getFields().get(2).getName()); assertEquals("_distance", root.getSchema().getFields().get(3).getName()); IntVector iVector = (IntVector) root.getVector("i"); @@ -152,9 +165,10 @@ void test_knn(boolean createVectorIndex) throws Exception { @Test void test_knn_with_new_data() throws Exception { - try (TestVectorDataset testVectorDataset = new TestVectorDataset(tempDir.resolve("test_knn_with_new_data"))) { + try (TestVectorDataset testVectorDataset = + new TestVectorDataset(tempDir.resolve("test_knn_with_new_data"))) { try (Dataset dataset = testVectorDataset.create()) { - testVectorDataset.createIndex(dataset); + testVectorDataset.createVectorIndex(dataset); } float[] key = new float[32]; @@ -168,7 +182,7 @@ void test_knn_with_new_data() throws Exception { for (Optional filter : filters) { for (Optional limit : limits) { - for (boolean useIndex : new boolean[] { true, false }) { + for (boolean useIndex : new boolean[] {true, false}) { cases.add(new TestCase(filter, limit, useIndex)); } } @@ -178,12 +192,8 @@ void test_knn_with_new_data() throws Exception { try (Dataset dataset = testVectorDataset.appendNewData()) { for (TestCase testCase : cases) { ScanOptions.Builder optionsBuilder = new ScanOptions.Builder() - .nearest(new Query.Builder() - .setColumn(TestVectorDataset.vectorColumnName) - .setKey(key) - .setK(k) - .setUseIndex(testCase.useIndex) - .build()); + .nearest(new Query.Builder().setColumn(TestVectorDataset.vectorColumnName).setKey(key) + .setK(k).setUseIndex(testCase.useIndex).build()); testCase.filter.ifPresent(optionsBuilder::filter); testCase.limit.ifPresent(optionsBuilder::limit); @@ -207,7 +217,8 @@ void test_knn_with_new_data() throws Exception { // Top one should be the first value of new data IntVector iVector = (IntVector) root.getVector("i"); - assertEquals(400, iVector.get(0), "First result should be the first value of new data"); + assertEquals(400, iVector.get(0), + "First result should be the first value of new data"); // Check if distances are in ascending order Float4Vector distanceVector = (Float4Vector) root.getVector("_distance"); From 47c3796f7c0523a97a5ad328af38f4f0b0ea43ec Mon Sep 17 00:00:00 2001 From: Lu Qiu Date: Wed, 4 Sep 2024 21:00:25 -0700 Subject: [PATCH 3/5] Add ScalarIndexParams and InvertedIndexParams --- .../lance/index/InvertedIndexParams.java | 60 +++++++++++++++++ .../lance/index/ScalarIndexParams.java | 64 +++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 java/core/src/main/java/com/lancedb/lance/index/InvertedIndexParams.java create mode 100644 java/core/src/main/java/com/lancedb/lance/index/ScalarIndexParams.java diff --git a/java/core/src/main/java/com/lancedb/lance/index/InvertedIndexParams.java b/java/core/src/main/java/com/lancedb/lance/index/InvertedIndexParams.java new file mode 100644 index 0000000000..84c6bcaed3 --- /dev/null +++ b/java/core/src/main/java/com/lancedb/lance/index/InvertedIndexParams.java @@ -0,0 +1,60 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ + +package com.lancedb.lance.index; + +import org.apache.commons.lang3.builder.ToStringBuilder; + +/** + * Parameters for building an Inverted Index. This determines how the index is constructed and what + * information it stores. + */ +public class InvertedIndexParams { + private final boolean withPosition; + + private InvertedIndexParams(Builder builder) { + this.withPosition = builder.withPosition; + } + + public static class Builder { + private boolean withPosition = true; + + /** + * Create a new builder for Inverted Index parameters. + */ + public Builder() {} + + /** + * @param withPosition if true, store the position of the term in the document. This can + * significantly increase the size of the index. If false, only store the frequency of + * the term in the document. + * @return Builder + */ + public Builder setWithPosition(boolean withPosition) { + this.withPosition = withPosition; + return this; + } + + public InvertedIndexParams build() { + return new InvertedIndexParams(this); + } + } + + public boolean isWithPosition() { + return withPosition; + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("withPosition", withPosition).toString(); + } +} diff --git a/java/core/src/main/java/com/lancedb/lance/index/ScalarIndexParams.java b/java/core/src/main/java/com/lancedb/lance/index/ScalarIndexParams.java new file mode 100644 index 0000000000..2fffd2b006 --- /dev/null +++ b/java/core/src/main/java/com/lancedb/lance/index/ScalarIndexParams.java @@ -0,0 +1,64 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ + +package com.lancedb.lance.index; + +import org.apache.commons.lang3.builder.ToStringBuilder; +import java.util.Optional; + +/** + * Parameters for building a Scalar Index. This determines how the index is constructed and what + * information it stores. + */ +public class ScalarIndexParams { + private final Optional forceIndexType; + + private ScalarIndexParams(Builder builder) { + this.forceIndexType = builder.forceIndexType; + } + + public static class Builder { + private Optional forceIndexType = Optional.empty(); + + /** + * Create a new builder for Scalar Index parameters. + */ + public Builder() {} + + /** + * @param forceIndexType if set, always use the given index type and skip auto-detection. + * @return Builder + */ + public Builder setForceIndexType(ScalarIndexType forceIndexType) { + this.forceIndexType = Optional.ofNullable(forceIndexType); + return this; + } + + public ScalarIndexParams build() { + return new ScalarIndexParams(this); + } + } + + public Optional getForceIndexType() { + return forceIndexType; + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("forceIndexType", forceIndexType.orElse(null)) + .toString(); + } + + public enum ScalarIndexType { + BTREE, BITMAP, LABEL_LIST, INVERTED + } +} From 5e500c1aabe11eee915472263038dcc890ac8b48 Mon Sep 17 00:00:00 2001 From: Lu Qiu Date: Thu, 5 Sep 2024 09:33:55 -0700 Subject: [PATCH 4/5] Address cargo clippy --- java/core/lance-jni/src/utils.rs | 12 ++++++------ rust/lance-index/src/scalar/inverted/index.rs | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/java/core/lance-jni/src/utils.rs b/java/core/lance-jni/src/utils.rs index b25d9670d3..bf7df9d0ce 100644 --- a/java/core/lance-jni/src/utils.rs +++ b/java/core/lance-jni/src/utils.rs @@ -247,8 +247,8 @@ pub fn get_index_params( None }; - if vector_index_params_option.is_some() { - return Ok(Box::new(vector_index_params_option.unwrap()) as Box); + if let Some(params) = vector_index_params_option { + return Ok(Box::new(params) as Box); } let scalar_index_params_option_object = env @@ -299,8 +299,8 @@ pub fn get_index_params( None }; - if scalar_index_params_option.is_some() { - return Ok(Box::new(scalar_index_params_option.unwrap()) as Box); + if let Some(params) = scalar_index_params_option { + return Ok(Box::new(params) as Box); } let inverted_index_params_option_object = env @@ -337,8 +337,8 @@ pub fn get_index_params( None }; - if inverted_index_params_option.is_some() { - return Ok(Box::new(inverted_index_params_option.unwrap()) as Box); + if let Some(params) = inverted_index_params_option { + return Ok(Box::new(params) as Box); } Err(Error::input_error( diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs index 6c31ad9f2f..3daf18506c 100644 --- a/rust/lance-index/src/scalar/inverted/index.rs +++ b/rust/lance-index/src/scalar/inverted/index.rs @@ -860,5 +860,5 @@ pub fn collect_tokens(text: &str) -> Vec { } pub fn is_phrase_query(query: &str) -> bool { - query.starts_with("\"") && query.ends_with("\"") + query.starts_with('\"') && query.ends_with('\"') } From 30b34959f36a2f51849f830c26ddf1f1d8ac85af Mon Sep 17 00:00:00 2001 From: Lu Qiu Date: Tue, 10 Sep 2024 08:28:04 -0700 Subject: [PATCH 5/5] Fix checkstyle --- .../main/java/com/lancedb/lance/index/IndexParams.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/java/core/src/main/java/com/lancedb/lance/index/IndexParams.java b/java/core/src/main/java/com/lancedb/lance/index/IndexParams.java index 3d2d06bd6e..ad736f4a5c 100644 --- a/java/core/src/main/java/com/lancedb/lance/index/IndexParams.java +++ b/java/core/src/main/java/com/lancedb/lance/index/IndexParams.java @@ -38,10 +38,12 @@ public static class Builder { private Optional scalarIndexParams = Optional.empty(); private Optional invertedIndexParams = Optional.empty(); - public Builder() {} + public Builder() { + } /** - * Set the distance type for calculating the distance between vectors. Default to L2. + * Set the distance type for calculating the distance between vectors. Default + * to L2. * * @param distanceType distance type * @return this builder @@ -53,7 +55,7 @@ public Builder setDistanceType(DistanceType distanceType) { /** * Vector index parameters for creating a vector index. - * + * * @param vectorIndexParams vector index parameters * @return this builder */