diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 476ff13d41..a376564909 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -1358,6 +1358,10 @@ def create_scalar_index( remove_stop_words: bool, default False This is for the ``INVERTED`` index. If True, the index will remove stop words. + ascii_folding: bool, default False + This is for the ``INVERTED`` index. If True, the index will convert + non-ascii characters to ascii characters if possible. + This would remove accents like "é" -> "e". Examples -------- diff --git a/rust/lance-index/src/scalar/inverted/builder.rs b/rust/lance-index/src/scalar/inverted/builder.rs index 6d8890b8fe..6ed1931116 100644 --- a/rust/lance-index/src/scalar/inverted/builder.rs +++ b/rust/lance-index/src/scalar/inverted/builder.rs @@ -517,6 +517,7 @@ impl IndexWorker { Ok(()) } + #[instrument(level = "debug", skip_all)] async fn flush_posting_list(&mut self, token: String) -> Result { if let Some(posting_list) = self.posting_lists.remove(&token) { let size = posting_list.size();