From cf9c5c5be576b99819cb316c6cce4822e9fc0dda Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Thu, 28 Mar 2024 18:21:09 -0500 Subject: [PATCH] fix: use the fair spill pool instead of the greedy spill pool (#2126) Right now we use the greedy spill pool when we use spilling for btree index training. This is because the btree index plan is: ``` Input Stream -> Sort -> SortPreservingMerge -> Output Input Stream (old data) -/ ``` My thought was that `Sort` was the only spillable operator and we could use the greedy pool. It turns out that `SortPreservingMerge` is also spillable and we can't get away with the greedy pool. --- rust/lance-datafusion/src/exec.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rust/lance-datafusion/src/exec.rs b/rust/lance-datafusion/src/exec.rs index 8872d8a6dc..844afb4b6c 100644 --- a/rust/lance-datafusion/src/exec.rs +++ b/rust/lance-datafusion/src/exec.rs @@ -23,7 +23,7 @@ use datafusion::{ execution::{ context::{SessionConfig, SessionContext, SessionState}, disk_manager::DiskManagerConfig, - memory_pool::GreedyMemoryPool, + memory_pool::FairSpillPool, runtime_env::{RuntimeConfig, RuntimeEnv}, TaskContext, }, @@ -175,9 +175,8 @@ pub fn execute_plan( let mut runtime_config = RuntimeConfig::new(); if options.use_spilling { runtime_config.disk_manager = DiskManagerConfig::NewOs; - runtime_config.memory_pool = Some(Arc::new(GreedyMemoryPool::new( - options.mem_pool_size as usize, - ))); + runtime_config.memory_pool = + Some(Arc::new(FairSpillPool::new(options.mem_pool_size as usize))); } let runtime_env = Arc::new(RuntimeEnv::new(runtime_config)?); let session_state = SessionState::new_with_config_rt(session_config, runtime_env);