From 746380b303418b25fcc424cbfc1057d6b2c8f0dc Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Mon, 14 Oct 2024 13:44:16 +0200 Subject: [PATCH] Optimize `signum` function (3-25x faster) (#12890) * add bench * optimize signum --- datafusion/functions/Cargo.toml | 5 ++ datafusion/functions/benches/signum.rs | 46 ++++++++++++++++++ datafusion/functions/src/math/signum.rs | 64 ++++++++++++------------- 3 files changed, 81 insertions(+), 34 deletions(-) create mode 100644 datafusion/functions/benches/signum.rs diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 2ffe93a0e567..e08dfb2de07e 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -137,6 +137,11 @@ harness = false name = "to_char" required-features = ["datetime_expressions"] +[[bench]] +harness = false +name = "signum" +required-features = ["math_expressions"] + [[bench]] harness = false name = "substr_index" diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs new file mode 100644 index 000000000000..9f8d8258c823 --- /dev/null +++ b/datafusion/functions/benches/signum.rs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use arrow::{ + datatypes::{Float32Type, Float64Type}, + util::bench_util::create_primitive_array, +}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_expr::ColumnarValue; +use datafusion_functions::math::signum; +use std::sync::Arc; + +fn criterion_benchmark(c: &mut Criterion) { + let signum = signum(); + for size in [1024, 4096, 8192] { + let f32_array = Arc::new(create_primitive_array::(size, 0.2)); + let f32_args = vec![ColumnarValue::Array(f32_array)]; + c.bench_function(&format!("signum f32 array: {}", size), |b| { + b.iter(|| black_box(signum.invoke(&f32_args).unwrap())) + }); + let f64_array = Arc::new(create_primitive_array::(size, 0.2)); + let f64_args = vec![ColumnarValue::Array(f64_array)]; + c.bench_function(&format!("signum f64 array: {}", size), |b| { + b.iter(|| black_box(signum.invoke(&f64_args).unwrap())) + }); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index d2a806a46e13..15b73f930343 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -18,11 +18,11 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, Float32Array, Float64Array}; -use arrow::datatypes::DataType; +use arrow::array::{ArrayRef, AsArray}; use arrow::datatypes::DataType::{Float32, Float64}; +use arrow::datatypes::{DataType, Float32Type, Float64Type}; -use datafusion_common::{exec_err, DataFusionError, Result}; +use datafusion_common::{exec_err, Result}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::ColumnarValue; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; @@ -86,37 +86,33 @@ impl ScalarUDFImpl for SignumFunc { /// signum SQL function pub fn signum(args: &[ArrayRef]) -> Result { match args[0].data_type() { - Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!( - &args[0], - "signum", - Float64Array, - Float64Array, - { - |x: f64| { - if x == 0_f64 { - 0_f64 - } else { - x.signum() - } - } - } - )) as ArrayRef), - - Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!( - &args[0], - "signum", - Float32Array, - Float32Array, - { - |x: f32| { - if x == 0_f32 { - 0_f32 - } else { - x.signum() - } - } - } - )) as ArrayRef), + Float64 => Ok(Arc::new( + args[0] + .as_primitive::() + .unary::<_, Float64Type>( + |x: f64| { + if x == 0_f64 { + 0_f64 + } else { + x.signum() + } + }, + ), + ) as ArrayRef), + + Float32 => Ok(Arc::new( + args[0] + .as_primitive::() + .unary::<_, Float32Type>( + |x: f32| { + if x == 0_f32 { + 0_f32 + } else { + x.signum() + } + }, + ), + ) as ArrayRef), other => exec_err!("Unsupported data type {other:?} for function signum"), }