From 0101fea756d3dd486bfbb8031f3b1feb9ae032b9 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 17 Oct 2024 10:13:29 +0800 Subject: [PATCH] add doc for ascii_folding Signed-off-by: BubbleCal --- python/python/lance/dataset.py | 4 ++++ rust/lance-index/src/scalar/inverted/builder.rs | 1 + 2 files changed, 5 insertions(+) diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 476ff13d41..a376564909 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -1358,6 +1358,10 @@ def create_scalar_index( remove_stop_words: bool, default False This is for the ``INVERTED`` index. If True, the index will remove stop words. + ascii_folding: bool, default False + This is for the ``INVERTED`` index. If True, the index will convert + non-ascii characters to ascii characters if possible. + This would remove accents like "é" -> "e". Examples -------- diff --git a/rust/lance-index/src/scalar/inverted/builder.rs b/rust/lance-index/src/scalar/inverted/builder.rs index 6d8890b8fe..6ed1931116 100644 --- a/rust/lance-index/src/scalar/inverted/builder.rs +++ b/rust/lance-index/src/scalar/inverted/builder.rs @@ -517,6 +517,7 @@ impl IndexWorker { Ok(()) } + #[instrument(level = "debug", skip_all)] async fn flush_posting_list(&mut self, token: String) -> Result { if let Some(posting_list) = self.posting_lists.remove(&token) { let size = posting_list.size();