diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000000..ef7507bc66 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +d5e444d0a71409ae3701d4249ad877f1fb9e2235 # introduced `rustfmt.toml` and ran formatter; ignoring large formatting changes diff --git a/Cargo.lock b/Cargo.lock index a60b0c6a06..5638f71a8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1758,7 +1758,6 @@ version = "0.3.0-dev0" dependencies = [ "arrow2", "async-compat", - "async-compression", "async-stream", "common-error", "common-py-serde", @@ -1777,7 +1776,6 @@ dependencies = [ "snafu", "tokio", "tokio-util", - "url", ] [[package]] diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000000..d181d8c6e3 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,3 @@ +group_imports = "StdExternalCrate" +ignore = ["src/arrow2", "src/parquet2"] +imports_granularity = "Crate" diff --git a/src/common/arrow-ffi/src/lib.rs b/src/common/arrow-ffi/src/lib.rs index 9176e0eeb9..463d2d05b8 100644 --- a/src/common/arrow-ffi/src/lib.rs +++ b/src/common/arrow-ffi/src/lib.rs @@ -1,7 +1,6 @@ use std::io::Cursor; use arrow2::{array::Array, datatypes::Field, ffi}; - #[cfg(feature = "python")] use pyo3::ffi::Py_uintptr_t; #[cfg(feature = "python")] diff --git a/src/common/daft-config/src/lib.rs b/src/common/daft-config/src/lib.rs index edef9aea9d..dcaef0a2f8 100644 --- a/src/common/daft-config/src/lib.rs +++ b/src/common/daft-config/src/lib.rs @@ -105,15 +105,13 @@ impl DaftExecutionConfig { #[cfg(feature = "python")] mod python; +#[cfg(feature = "python")] +use pyo3::prelude::*; #[cfg(feature = "python")] pub use python::PyDaftExecutionConfig; - #[cfg(feature = "python")] pub use python::PyDaftPlanningConfig; -#[cfg(feature = "python")] -use pyo3::prelude::*; - #[cfg(feature = "python")] pub fn register_modules(parent: &Bound) -> PyResult<()> { parent.add_class::()?; diff --git a/src/common/daft-config/src/python.rs b/src/common/daft-config/src/python.rs index 0b187d6a9c..5dda71eda8 100644 --- a/src/common/daft-config/src/python.rs +++ b/src/common/daft-config/src/python.rs @@ -1,11 +1,11 @@ use std::sync::Arc; +use common_io_config::python::IOConfig as PyIOConfig; use common_py_serde::impl_bincode_py_state_serialization; use pyo3::prelude::*; use serde::{Deserialize, Serialize}; use crate::{DaftExecutionConfig, DaftPlanningConfig}; -use common_io_config::python::IOConfig as PyIOConfig; #[derive(Clone, Default, Serialize, Deserialize)] #[pyclass(module = "daft.daft")] diff --git a/src/common/display/src/mermaid.rs b/src/common/display/src/mermaid.rs index f78737f284..41b452b528 100644 --- a/src/common/display/src/mermaid.rs +++ b/src/common/display/src/mermaid.rs @@ -1,6 +1,7 @@ -use indexmap::IndexMap; use std::fmt; +use indexmap::IndexMap; + use crate::{tree::TreeDisplay, DisplayLevel}; pub trait MermaidDisplay: TreeDisplay { diff --git a/src/common/error/src/lib.rs b/src/common/error/src/lib.rs index c4fbd874cf..ef77f241d1 100644 --- a/src/common/error/src/lib.rs +++ b/src/common/error/src/lib.rs @@ -1,5 +1,4 @@ mod error; -pub use error::DaftError; -pub use error::DaftResult; +pub use error::{DaftError, DaftResult}; #[cfg(feature = "python")] mod python; diff --git a/src/common/error/src/python.rs b/src/common/error/src/python.rs index 34c8c3b1fc..b6b4e48523 100644 --- a/src/common/error/src/python.rs +++ b/src/common/error/src/python.rs @@ -1,5 +1,4 @@ -use pyo3::exceptions::PyFileNotFoundError; -use pyo3::import_exception; +use pyo3::{exceptions::PyFileNotFoundError, import_exception}; use crate::DaftError; diff --git a/src/common/file-formats/src/file_format.rs b/src/common/file-formats/src/file_format.rs index 9817ed202d..c5e553aceb 100644 --- a/src/common/file-formats/src/file_format.rs +++ b/src/common/file-formats/src/file_format.rs @@ -8,7 +8,6 @@ use common_error::{DaftError, DaftResult}; use common_py_serde::impl_bincode_py_state_serialization; #[cfg(feature = "python")] use pyo3::prelude::*; - use serde::{Deserialize, Serialize}; /// Format of a file, e.g. Parquet, CSV, JSON. diff --git a/src/common/file-formats/src/file_format_config.rs b/src/common/file-formats/src/file_format_config.rs index 05d3c2ce9a..fe659bc444 100644 --- a/src/common/file-formats/src/file_format_config.rs +++ b/src/common/file-formats/src/file_format_config.rs @@ -1,11 +1,8 @@ -use crate::FileFormat; -use daft_schema::{field::Field, time_unit::TimeUnit}; -use serde::{Deserialize, Serialize}; -use std::hash::Hash; -use std::{collections::BTreeMap, sync::Arc}; +use std::{collections::BTreeMap, hash::Hash, sync::Arc}; use common_py_serde::impl_bincode_py_state_serialization; - +use daft_schema::{field::Field, time_unit::TimeUnit}; +use serde::{Deserialize, Serialize}; #[cfg(feature = "python")] use { common_py_serde::{deserialize_py_object, serialize_py_object}, @@ -13,6 +10,8 @@ use { pyo3::{pyclass, pymethods, types::PyAnyMethods, PyObject, PyResult, Python}, }; +use crate::FileFormat; + /// Configuration for parsing a particular file format. #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum FileFormatConfig { diff --git a/src/common/file-formats/src/lib.rs b/src/common/file-formats/src/lib.rs index 5a06630805..cc349d069c 100644 --- a/src/common/file-formats/src/lib.rs +++ b/src/common/file-formats/src/lib.rs @@ -2,13 +2,12 @@ mod file_format; pub use file_format::FileFormat; mod file_format_config; +#[cfg(feature = "python")] +pub use file_format_config::DatabaseSourceConfig; pub use file_format_config::{ CsvSourceConfig, FileFormatConfig, JsonSourceConfig, ParquetSourceConfig, }; -#[cfg(feature = "python")] -pub use file_format_config::DatabaseSourceConfig; - #[cfg(feature = "python")] pub mod python; diff --git a/src/common/io-config/src/azure.rs b/src/common/io-config/src/azure.rs index c2136f6abb..1aac69e17a 100644 --- a/src/common/io-config/src/azure.rs +++ b/src/common/io-config/src/azure.rs @@ -1,8 +1,6 @@ -use std::fmt::Display; -use std::fmt::Formatter; +use std::fmt::{Display, Formatter}; -use serde::Deserialize; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use crate::ObfuscatedString; diff --git a/src/common/io-config/src/config.rs b/src/common/io-config/src/config.rs index 94f97e1cad..7d9ce2230e 100644 --- a/src/common/io-config/src/config.rs +++ b/src/common/io-config/src/config.rs @@ -1,11 +1,8 @@ -use std::fmt::Display; -use std::fmt::Formatter; +use std::fmt::{Display, Formatter}; -use serde::Deserialize; -use serde::Serialize; +use serde::{Deserialize, Serialize}; -use crate::HTTPConfig; -use crate::{AzureConfig, GCSConfig, S3Config}; +use crate::{AzureConfig, GCSConfig, HTTPConfig, S3Config}; #[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] pub struct IOConfig { pub s3: S3Config, diff --git a/src/common/io-config/src/gcs.rs b/src/common/io-config/src/gcs.rs index 693f5a8a57..cdbf57671d 100644 --- a/src/common/io-config/src/gcs.rs +++ b/src/common/io-config/src/gcs.rs @@ -1,8 +1,6 @@ -use std::fmt::Display; -use std::fmt::Formatter; +use std::fmt::{Display, Formatter}; -use serde::Deserialize; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use crate::ObfuscatedString; diff --git a/src/common/io-config/src/http.rs b/src/common/io-config/src/http.rs index 7fb3f38eeb..275c55f106 100644 --- a/src/common/io-config/src/http.rs +++ b/src/common/io-config/src/http.rs @@ -1,8 +1,6 @@ -use std::fmt::Display; -use std::fmt::Formatter; +use std::fmt::{Display, Formatter}; -use serde::Deserialize; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use crate::ObfuscatedString; diff --git a/src/common/io-config/src/lib.rs b/src/common/io-config/src/lib.rs index 66cb17774f..b50b4e4185 100644 --- a/src/common/io-config/src/lib.rs +++ b/src/common/io-config/src/lib.rs @@ -16,8 +16,11 @@ use secrecy::{ExposeSecret, Secret}; use serde::{Deserialize, Deserializer, Serialize}; pub use crate::{ - azure::AzureConfig, config::IOConfig, gcs::GCSConfig, http::HTTPConfig, s3::S3Config, - s3::S3Credentials, + azure::AzureConfig, + config::IOConfig, + gcs::GCSConfig, + http::HTTPConfig, + s3::{S3Config, S3Credentials}, }; #[derive(Clone)] diff --git a/src/common/io-config/src/python.rs b/src/common/io-config/src/python.rs index 01ce95f689..ef1276bbc6 100644 --- a/src/common/io-config/src/python.rs +++ b/src/common/io-config/src/python.rs @@ -239,8 +239,7 @@ impl IOConfig { } pub fn __hash__(&self) -> PyResult { - use std::collections::hash_map::DefaultHasher; - use std::hash::Hash; + use std::{collections::hash_map::DefaultHasher, hash::Hash}; let mut hasher = DefaultHasher::new(); self.config.hash(&mut hasher); diff --git a/src/common/io-config/src/s3.rs b/src/common/io-config/src/s3.rs index 1c9ca74c5e..a6e4fc97b5 100644 --- a/src/common/io-config/src/s3.rs +++ b/src/common/io-config/src/s3.rs @@ -1,15 +1,13 @@ +use std::{ + any::Any, + fmt::{Debug, Display, Formatter}, + hash::{Hash, Hasher}, + time::SystemTime, +}; + use aws_credential_types::provider::ProvideCredentials; -use chrono::offset::Utc; -use chrono::DateTime; -use serde::Deserialize; -use serde::Serialize; -use std::any::Any; -use std::fmt::Debug; -use std::fmt::Display; -use std::fmt::Formatter; -use std::hash::Hash; -use std::hash::Hasher; -use std::time::SystemTime; +use chrono::{offset::Utc, DateTime}; +use serde::{Deserialize, Serialize}; pub use crate::ObfuscatedString; diff --git a/src/common/py-serde/src/lib.rs b/src/common/py-serde/src/lib.rs index af7c0cf7ed..4c3aa3387c 100644 --- a/src/common/py-serde/src/lib.rs +++ b/src/common/py-serde/src/lib.rs @@ -1,6 +1,6 @@ mod python; +pub use bincode; + #[cfg(feature = "python")] pub use crate::{python::deserialize_py_object, python::serialize_py_object}; - -pub use bincode; diff --git a/src/common/py-serde/src/python.rs b/src/common/py-serde/src/python.rs index 1dbbd8d97b..63c20a4699 100644 --- a/src/common/py-serde/src/python.rs +++ b/src/common/py-serde/src/python.rs @@ -1,8 +1,12 @@ +use std::fmt; + #[cfg(feature = "python")] use pyo3::{types::PyAnyMethods, PyObject, Python}; - -use serde::{de::Error as DeError, de::Visitor, ser::Error as SerError, Deserializer, Serializer}; -use std::fmt; +use serde::{ + de::{Error as DeError, Visitor}, + ser::Error as SerError, + Deserializer, Serializer, +}; #[cfg(feature = "python")] pub fn serialize_py_object(obj: &PyObject, s: S) -> Result diff --git a/src/common/resource-request/src/lib.rs b/src/common/resource-request/src/lib.rs index c73bf88381..9367a7af06 100644 --- a/src/common/resource-request/src/lib.rs +++ b/src/common/resource-request/src/lib.rs @@ -1,3 +1,8 @@ +use std::{ + hash::{Hash, Hasher}, + ops::Add, +}; + use common_hashable_float_wrapper::FloatWrapper; use common_py_serde::impl_bincode_py_state_serialization; #[cfg(feature = "python")] @@ -8,10 +13,6 @@ use pyo3::{ types::{PyModule, PyModuleMethods}, Bound, PyObject, PyResult, Python, }; - -use std::hash::{Hash, Hasher}; -use std::ops::Add; - use serde::{Deserialize, Serialize}; /// Resource request for a query fragment task. diff --git a/src/common/treenode/src/lib.rs b/src/common/treenode/src/lib.rs index de7ff59c30..68da6c47c8 100644 --- a/src/common/treenode/src/lib.rs +++ b/src/common/treenode/src/lib.rs @@ -939,9 +939,8 @@ impl TreeNode for T { mod tests { use std::fmt::Display; - use crate::Result; use crate::{ - Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion, TreeNodeRewriter, + Result, Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor, }; diff --git a/src/daft-compression/src/compression.rs b/src/daft-compression/src/compression.rs index 268b1566d9..23ca21df47 100644 --- a/src/daft-compression/src/compression.rs +++ b/src/daft-compression/src/compression.rs @@ -1,8 +1,9 @@ +use std::{path::PathBuf, pin::Pin}; + use async_compression::tokio::bufread::{ BrotliDecoder, BzDecoder, DeflateDecoder, GzipDecoder, LzmaDecoder, XzDecoder, ZlibDecoder, ZstdDecoder, }; -use std::{path::PathBuf, pin::Pin}; use tokio::io::{AsyncBufRead, AsyncRead}; use url::Url; diff --git a/src/daft-core/src/array/boolean.rs b/src/daft-core/src/array/boolean.rs index 7d53c076da..df7084f95f 100644 --- a/src/daft-core/src/array/boolean.rs +++ b/src/daft-core/src/array/boolean.rs @@ -1,6 +1,5 @@ -use crate::datatypes::BooleanArray; - use super::ops::as_arrow::AsArrow; +use crate::datatypes::BooleanArray; impl BooleanArray { pub fn as_bitmap(&self) -> &arrow2::bitmap::Bitmap { diff --git a/src/daft-core/src/array/fixed_size_list_array.rs b/src/daft-core/src/array/fixed_size_list_array.rs index 06d1651874..d265f42929 100644 --- a/src/daft-core/src/array/fixed_size_list_array.rs +++ b/src/daft-core/src/array/fixed_size_list_array.rs @@ -2,10 +2,11 @@ use std::sync::Arc; use common_error::{DaftError, DaftResult}; -use crate::array::growable::{Growable, GrowableArray}; -use crate::datatypes::DataType; -use crate::datatypes::{DaftArrayType, Field}; -use crate::series::Series; +use crate::{ + array::growable::{Growable, GrowableArray}, + datatypes::{DaftArrayType, DataType, Field}, + series::Series, +}; #[derive(Clone, Debug)] pub struct FixedSizeListArray { @@ -222,13 +223,12 @@ impl Iterator for FixedSizeListArrayIter<'_> { mod tests { use common_error::DaftResult; + use super::FixedSizeListArray; use crate::{ datatypes::{DataType, Field, Int32Array}, series::IntoSeries, }; - use super::FixedSizeListArray; - /// Helper that returns a FixedSizeListArray, with each list element at len=3 fn get_i32_fixed_size_list_array(validity: &[bool]) -> FixedSizeListArray { let field = Field::new("foo", DataType::FixedSizeList(Box::new(DataType::Int32), 3)); diff --git a/src/daft-core/src/array/from.rs b/src/daft-core/src/array/from.rs index 24c9edc46f..3ef75a23a7 100644 --- a/src/daft-core/src/array/from.rs +++ b/src/daft-core/src/array/from.rs @@ -1,14 +1,15 @@ -use std::borrow::Cow; -use std::sync::Arc; +use std::{borrow::Cow, sync::Arc}; -use crate::datatypes::{ - BinaryArray, BooleanArray, DaftNumericType, DaftPhysicalType, DataType, Field, - FixedSizeBinaryArray, NullArray, Utf8Array, Utf8Type, -}; - -use crate::array::DataArray; use common_error::{DaftError, DaftResult}; +use crate::{ + array::DataArray, + datatypes::{ + BinaryArray, BooleanArray, DaftNumericType, DaftPhysicalType, DataType, Field, + FixedSizeBinaryArray, NullArray, Utf8Array, Utf8Type, + }, +}; + impl From<(&str, Box>)> for DataArray { diff --git a/src/daft-core/src/array/from_iter.rs b/src/daft-core/src/array/from_iter.rs index df3d1e0bef..484b676f30 100644 --- a/src/daft-core/src/array/from_iter.rs +++ b/src/daft-core/src/array/from_iter.rs @@ -1,7 +1,5 @@ -use crate::array::prelude::*; -use crate::datatypes::prelude::*; - use super::DataArray; +use crate::{array::prelude::*, datatypes::prelude::*}; impl DataArray where diff --git a/src/daft-core/src/array/growable/arrow_growable.rs b/src/daft-core/src/array/growable/arrow_growable.rs index d5ed790a05..4cea3b2569 100644 --- a/src/daft-core/src/array/growable/arrow_growable.rs +++ b/src/daft-core/src/array/growable/arrow_growable.rs @@ -2,9 +2,12 @@ use std::{marker::PhantomData, sync::Arc}; use common_error::DaftResult; -use crate::{array::prelude::*, datatypes::prelude::*, series::IntoSeries, series::Series}; - use super::Growable; +use crate::{ + array::prelude::*, + datatypes::prelude::*, + series::{IntoSeries, Series}, +}; pub struct ArrowBackedDataArrayGrowable< 'a, diff --git a/src/daft-core/src/array/growable/fixed_size_list_growable.rs b/src/daft-core/src/array/growable/fixed_size_list_growable.rs index 6126cf2f0a..fd10c8dd94 100644 --- a/src/daft-core/src/array/growable/fixed_size_list_growable.rs +++ b/src/daft-core/src/array/growable/fixed_size_list_growable.rs @@ -1,15 +1,12 @@ use common_error::DaftResult; +use super::{bitmap_growable::ArrowBitmapGrowable, Growable}; use crate::{ array::{growable::make_growable, FixedSizeListArray}, - datatypes::DataType, - datatypes::Field, - series::IntoSeries, - series::Series, + datatypes::{DataType, Field}, + series::{IntoSeries, Series}, }; -use super::{bitmap_growable::ArrowBitmapGrowable, Growable}; - pub struct FixedSizeListGrowable<'a> { name: String, dtype: DataType, diff --git a/src/daft-core/src/array/growable/list_growable.rs b/src/daft-core/src/array/growable/list_growable.rs index 7dcfaa63bf..25f44761be 100644 --- a/src/daft-core/src/array/growable/list_growable.rs +++ b/src/daft-core/src/array/growable/list_growable.rs @@ -1,15 +1,13 @@ use arrow2::types::Index; use common_error::DaftResult; +use super::{bitmap_growable::ArrowBitmapGrowable, Growable}; use crate::{ array::{growable::make_growable, ListArray}, datatypes::{DataType, Field}, - series::IntoSeries, - series::Series, + series::{IntoSeries, Series}, }; -use super::{bitmap_growable::ArrowBitmapGrowable, Growable}; - pub struct ListGrowable<'a> { name: String, dtype: DataType, diff --git a/src/daft-core/src/array/growable/logical_growable.rs b/src/daft-core/src/array/growable/logical_growable.rs index f1c38a3be1..9e0a0b4425 100644 --- a/src/daft-core/src/array/growable/logical_growable.rs +++ b/src/daft-core/src/array/growable/logical_growable.rs @@ -2,9 +2,12 @@ use std::marker::PhantomData; use common_error::DaftResult; -use crate::{array::prelude::*, datatypes::prelude::*, series::IntoSeries, series::Series}; - use super::{Growable, GrowableArray}; +use crate::{ + array::prelude::*, + datatypes::prelude::*, + series::{IntoSeries, Series}, +}; pub struct LogicalGrowable where diff --git a/src/daft-core/src/array/growable/mod.rs b/src/daft-core/src/array/growable/mod.rs index ae6cf0127e..7db6948af6 100644 --- a/src/daft-core/src/array/growable/mod.rs +++ b/src/daft-core/src/array/growable/mod.rs @@ -1,8 +1,7 @@ use common_error::DaftResult; use crate::{ - array::prelude::*, - array::{FixedSizeListArray, ListArray, StructArray}, + array::{prelude::*, FixedSizeListArray, ListArray, StructArray}, datatypes::prelude::*, series::Series, with_match_daft_types, diff --git a/src/daft-core/src/array/growable/python_growable.rs b/src/daft-core/src/array/growable/python_growable.rs index e6ceda91d8..1427c14c7c 100644 --- a/src/daft-core/src/array/growable/python_growable.rs +++ b/src/daft-core/src/array/growable/python_growable.rs @@ -1,14 +1,12 @@ use std::{mem::swap, sync::Arc}; +use super::Growable; use crate::{ array::{pseudo_arrow::PseudoArrowArray, DataArray}, datatypes::{DataType, Field, PythonArray, PythonType}, - series::IntoSeries, - series::Series, + series::{IntoSeries, Series}, }; -use super::Growable; - pub struct PythonGrowable<'a> { name: String, dtype: DataType, diff --git a/src/daft-core/src/array/growable/struct_growable.rs b/src/daft-core/src/array/growable/struct_growable.rs index cb5db460f8..fb266ebb88 100644 --- a/src/daft-core/src/array/growable/struct_growable.rs +++ b/src/daft-core/src/array/growable/struct_growable.rs @@ -1,15 +1,12 @@ use common_error::DaftResult; +use super::{bitmap_growable::ArrowBitmapGrowable, Growable}; use crate::{ array::{growable::make_growable, StructArray}, - datatypes::DataType, - datatypes::Field, - series::IntoSeries, - series::Series, + datatypes::{DataType, Field}, + series::{IntoSeries, Series}, }; -use super::{bitmap_growable::ArrowBitmapGrowable, Growable}; - pub struct StructGrowable<'a> { name: String, dtype: DataType, diff --git a/src/daft-core/src/array/image_array.rs b/src/daft-core/src/array/image_array.rs index 7422760380..205075efbf 100644 --- a/src/daft-core/src/array/image_array.rs +++ b/src/daft-core/src/array/image_array.rs @@ -2,10 +2,11 @@ use std::vec; use common_error::DaftResult; -use crate::array::prelude::*; -use crate::datatypes::prelude::*; - -use crate::series::{IntoSeries, Series}; +use crate::{ + array::prelude::*, + datatypes::prelude::*, + series::{IntoSeries, Series}, +}; #[derive(Clone)] pub struct BBox(pub u32, pub u32, pub u32, pub u32); diff --git a/src/daft-core/src/array/iterator.rs b/src/daft-core/src/array/iterator.rs index a08bbb33a5..238c4186df 100644 --- a/src/daft-core/src/array/iterator.rs +++ b/src/daft-core/src/array/iterator.rs @@ -1,11 +1,8 @@ use arrow2::bitmap::utils::{BitmapIter, ZipValidity}; +use super::{ops::as_arrow::AsArrow, DataArray}; use crate::datatypes::{BooleanArray, DaftNumericType}; -use super::DataArray; - -use super::ops::as_arrow::AsArrow; - impl<'a, T> IntoIterator for &'a DataArray where T: DaftNumericType, diff --git a/src/daft-core/src/array/list_array.rs b/src/daft-core/src/array/list_array.rs index 8579f68f24..964503b271 100644 --- a/src/daft-core/src/array/list_array.rs +++ b/src/daft-core/src/array/list_array.rs @@ -2,10 +2,11 @@ use std::sync::Arc; use common_error::{DaftError, DaftResult}; -use crate::array::growable::{Growable, GrowableArray}; -use crate::datatypes::DataType; -use crate::datatypes::{DaftArrayType, Field}; -use crate::series::Series; +use crate::{ + array::growable::{Growable, GrowableArray}, + datatypes::{DaftArrayType, DataType, Field}, + series::Series, +}; #[derive(Clone, Debug)] pub struct ListArray { diff --git a/src/daft-core/src/array/mod.rs b/src/daft-core/src/array/mod.rs index 76726e5f20..21a811b403 100644 --- a/src/daft-core/src/array/mod.rs +++ b/src/daft-core/src/array/mod.rs @@ -11,17 +11,16 @@ mod struct_array; use arrow2::bitmap::Bitmap; pub use fixed_size_list_array::FixedSizeListArray; pub use list_array::ListArray; - pub use struct_array::StructArray; mod boolean; mod from_iter; pub mod prelude; use std::{marker::PhantomData, sync::Arc}; -use crate::datatypes::{DaftArrayType, DaftPhysicalType, DataType, Field}; - use common_error::{DaftError, DaftResult}; +use crate::datatypes::{DaftArrayType, DaftPhysicalType, DataType, Field}; + #[derive(Debug)] pub struct DataArray { pub field: Arc, diff --git a/src/daft-core/src/array/ops/abs.rs b/src/daft-core/src/array/ops/abs.rs index 931b5b241b..13c35cfd30 100644 --- a/src/daft-core/src/array/ops/abs.rs +++ b/src/daft-core/src/array/ops/abs.rs @@ -1,9 +1,8 @@ +use common_error::DaftResult; use num_traits::Signed; use crate::{array::DataArray, datatypes::DaftNumericType}; -use common_error::DaftResult; - impl DataArray where T::Native: Signed, diff --git a/src/daft-core/src/array/ops/apply.rs b/src/daft-core/src/array/ops/apply.rs index 4f62986544..159232e602 100644 --- a/src/daft-core/src/array/ops/apply.rs +++ b/src/daft-core/src/array/ops/apply.rs @@ -1,12 +1,10 @@ use std::iter::zip; use arrow2::array::PrimitiveArray; - -use crate::{array::DataArray, datatypes::DaftNumericType, utils::arrow::arrow_bitmap_and_helper}; - use common_error::{DaftError, DaftResult}; use super::full::FullNull; +use crate::{array::DataArray, datatypes::DaftNumericType, utils::arrow::arrow_bitmap_and_helper}; impl DataArray where diff --git a/src/daft-core/src/array/ops/approx_count_distinct.rs b/src/daft-core/src/array/ops/approx_count_distinct.rs index eedba91808..068275d2ed 100644 --- a/src/daft-core/src/array/ops/approx_count_distinct.rs +++ b/src/daft-core/src/array/ops/approx_count_distinct.rs @@ -3,10 +3,11 @@ use std::collections::HashSet; use arrow2::array::PrimitiveArray; use common_error::DaftResult; -use crate::array::ops::as_arrow::AsArrow; -use crate::array::ops::DaftApproxCountDistinctAggable; -use crate::datatypes::UInt64Array; -use crate::utils::identity_hash_set::IdentityBuildHasher; +use crate::{ + array::ops::{as_arrow::AsArrow, DaftApproxCountDistinctAggable}, + datatypes::UInt64Array, + utils::identity_hash_set::IdentityBuildHasher, +}; impl DaftApproxCountDistinctAggable for UInt64Array { type Output = DaftResult; diff --git a/src/daft-core/src/array/ops/approx_sketch.rs b/src/daft-core/src/array/ops/approx_sketch.rs index f902bc88b2..acb1b8bb5c 100644 --- a/src/daft-core/src/array/ops/approx_sketch.rs +++ b/src/daft-core/src/array/ops/approx_sketch.rs @@ -1,12 +1,13 @@ -use super::as_arrow::AsArrow; -use super::from_arrow::FromArrow; -use super::DaftApproxSketchAggable; -use crate::array::ops::GroupIndices; -use crate::{array::StructArray, datatypes::*}; use arrow2::array::Array; use common_error::DaftResult; use sketches_ddsketch::{Config, DDSketch}; +use super::{as_arrow::AsArrow, from_arrow::FromArrow, DaftApproxSketchAggable}; +use crate::{ + array::{ops::GroupIndices, StructArray}, + datatypes::*, +}; + impl DaftApproxSketchAggable for &DataArray { type Output = DaftResult; diff --git a/src/daft-core/src/array/ops/arange.rs b/src/daft-core/src/array/ops/arange.rs index 1145e8d9f0..33da976928 100644 --- a/src/daft-core/src/array/ops/arange.rs +++ b/src/daft-core/src/array/ops/arange.rs @@ -1,10 +1,10 @@ +use common_error::DaftResult; + use crate::{ array::DataArray, datatypes::{DaftNumericType, Int64Array}, }; -use common_error::DaftResult; - impl DataArray where T: DaftNumericType, diff --git a/src/daft-core/src/array/ops/arithmetic.rs b/src/daft-core/src/array/ops/arithmetic.rs index bb3cb2aabd..aa6b067f78 100644 --- a/src/daft-core/src/array/ops/arithmetic.rs +++ b/src/daft-core/src/array/ops/arithmetic.rs @@ -1,18 +1,15 @@ use std::ops::{Add, Div, Mul, Rem, Sub}; use arrow2::{array::PrimitiveArray, compute::arithmetics::basic}; +use common_error::{DaftError, DaftResult}; +use super::{as_arrow::AsArrow, full::FullNull}; use crate::{ array::{DataArray, FixedSizeListArray}, - datatypes::DataType, - datatypes::{DaftNumericType, Field, Float64Array, Int64Array, Utf8Array}, + datatypes::{DaftNumericType, DataType, Field, Float64Array, Int64Array, Utf8Array}, kernels::utf8::add_utf8_arrays, series::Series, }; - -use common_error::{DaftError, DaftResult}; - -use super::{as_arrow::AsArrow, full::FullNull}; /// Helper function to perform arithmetic operations on a DataArray /// Takes both Kernel (array x array operation) and operation (scalar x scalar) functions /// The Kernel is used for when both arrays are non-unit length and the operation is used when broadcasting diff --git a/src/daft-core/src/array/ops/arrow2/comparison.rs b/src/daft-core/src/array/ops/arrow2/comparison.rs index 0dcb4ffcea..37f7b2a37b 100644 --- a/src/daft-core/src/array/ops/arrow2/comparison.rs +++ b/src/daft-core/src/array/ops/arrow2/comparison.rs @@ -3,13 +3,13 @@ use arrow2::{ datatypes::DataType, error::Result, }; +use common_error::DaftResult; use num_traits::Float; use crate::{ kernels::search_sorted::{build_is_valid, cmp_float}, series::Series, }; -use common_error::DaftResult; fn build_is_equal_float( left: &dyn Array, diff --git a/src/daft-core/src/array/ops/arrow2/sort/primitive/common.rs b/src/daft-core/src/array/ops/arrow2/sort/primitive/common.rs index 3ca5c1d30e..0c35e1180f 100644 --- a/src/daft-core/src/array/ops/arrow2/sort/primitive/common.rs +++ b/src/daft-core/src/array/ops/arrow2/sort/primitive/common.rs @@ -1,5 +1,8 @@ -use arrow2::array::ord::DynComparator; -use arrow2::{array::PrimitiveArray, bitmap::Bitmap, types::Index}; +use arrow2::{ + array::{ord::DynComparator, PrimitiveArray}, + bitmap::Bitmap, + types::Index, +}; pub fn idx_sort( validity: Option<&Bitmap>, diff --git a/src/daft-core/src/array/ops/arrow2/sort/primitive/sort.rs b/src/daft-core/src/array/ops/arrow2/sort/primitive/sort.rs index 92a0f33580..8535b8ca7c 100644 --- a/src/daft-core/src/array/ops/arrow2/sort/primitive/sort.rs +++ b/src/daft-core/src/array/ops/arrow2/sort/primitive/sort.rs @@ -16,15 +16,14 @@ // specific language governing permissions and limitations // under the License. use arrow2::bitmap::Bitmap; -use arrow2::buffer::Buffer; use arrow2::{ array::PrimitiveArray, bitmap::{utils::SlicesIterator, MutableBitmap}, + buffer::Buffer, + compute::sort::SortOptions, types::NativeType, }; -use arrow2::compute::sort::SortOptions; - /// # Safety /// `indices[i] < values.len()` for all i #[inline] @@ -163,11 +162,12 @@ where #[cfg(test)] mod tests { - use super::*; + use arrow2::{ + array::{ord, Array}, + datatypes::DataType, + }; - use arrow2::array::ord; - use arrow2::array::Array; - use arrow2::datatypes::DataType; + use super::*; fn test_sort_primitive_arrays( data: &[Option], diff --git a/src/daft-core/src/array/ops/as_arrow.rs b/src/daft-core/src/array/ops/as_arrow.rs index 07302c8161..51ba52dd2c 100644 --- a/src/daft-core/src/array/ops/as_arrow.rs +++ b/src/daft-core/src/array/ops/as_arrow.rs @@ -1,5 +1,9 @@ use arrow2::array; +#[cfg(feature = "python")] +use crate::array::pseudo_arrow::PseudoArrowArray; +#[cfg(feature = "python")] +use crate::datatypes::PythonArray; use crate::{ array::DataArray, datatypes::{ @@ -8,11 +12,6 @@ use crate::{ }, }; -#[cfg(feature = "python")] -use crate::array::pseudo_arrow::PseudoArrowArray; -#[cfg(feature = "python")] -use crate::datatypes::PythonArray; - pub trait AsArrow { type Output; diff --git a/src/daft-core/src/array/ops/between.rs b/src/daft-core/src/array/ops/between.rs index e9903d8211..90c22d2151 100644 --- a/src/daft-core/src/array/ops/between.rs +++ b/src/daft-core/src/array/ops/between.rs @@ -1,9 +1,10 @@ +use common_error::{DaftError, DaftResult}; + use super::{DaftBetween, DaftCompare, DaftLogical}; use crate::{ array::DataArray, datatypes::{BooleanArray, DaftNumericType}, }; -use common_error::{DaftError, DaftResult}; impl DaftBetween<&DataArray, &DataArray> for DataArray where @@ -38,9 +39,10 @@ where #[cfg(test)] mod tests { - use crate::{array::ops::DaftBetween, datatypes::Int64Array}; use common_error::DaftResult; + use crate::{array::ops::DaftBetween, datatypes::Int64Array}; + #[test] fn test_between_two_arrays_of_same_size() -> DaftResult<()> { let value = Int64Array::arange("value", 1, 4, 1)?; diff --git a/src/daft-core/src/array/ops/bitwise.rs b/src/daft-core/src/array/ops/bitwise.rs index 322378c6e2..6a38eee06a 100644 --- a/src/daft-core/src/array/ops/bitwise.rs +++ b/src/daft-core/src/array/ops/bitwise.rs @@ -1,13 +1,12 @@ -use crate::{ - array::DataArray, - datatypes::{DaftIntegerType, DaftNumericType}, -}; +use std::ops::{BitAnd, BitOr, BitXor}; use common_error::DaftResult; -use std::ops::{BitAnd, BitOr, BitXor}; - use super::DaftLogical; +use crate::{ + array::DataArray, + datatypes::{DaftIntegerType, DaftNumericType}, +}; impl DaftLogical<&DataArray> for DataArray where diff --git a/src/daft-core/src/array/ops/broadcast.rs b/src/daft-core/src/array/ops/broadcast.rs index b45beda8b5..b372ab2ef6 100644 --- a/src/daft-core/src/array/ops/broadcast.rs +++ b/src/daft-core/src/array/ops/broadcast.rs @@ -1,3 +1,6 @@ +use common_error::{DaftError, DaftResult}; + +use super::full::FullNull; use crate::{ array::{ growable::{Growable, GrowableArray}, @@ -6,10 +9,6 @@ use crate::{ datatypes::{DaftArrayType, DaftPhysicalType, DataType}, }; -use common_error::{DaftError, DaftResult}; - -use super::full::FullNull; - pub trait Broadcastable { fn broadcast(&self, num: usize) -> DaftResult where diff --git a/src/daft-core/src/array/ops/cast.rs b/src/daft-core/src/array/ops/cast.rs index bb995b3667..191354eb68 100644 --- a/src/daft-core/src/array/ops/cast.rs +++ b/src/daft-core/src/array/ops/cast.rs @@ -4,29 +4,6 @@ use std::{ sync::Arc, }; -use super::as_arrow::AsArrow; -use crate::{ - array::{ - growable::make_growable, - image_array::ImageArraySidecarData, - ops::{from_arrow::FromArrow, full::FullNull}, - DataArray, FixedSizeListArray, ListArray, StructArray, - }, - datatypes::{ - logical::{ - DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeTensorArray, ImageArray, LogicalArray, MapArray, TensorArray, TimeArray, - TimestampArray, - }, - DaftArrayType, DaftArrowBackedType, DaftLogicalType, DataType, Field, ImageMode, - Int32Array, Int64Array, NullArray, TimeUnit, UInt64Array, Utf8Array, - }, - series::{IntoSeries, Series}, - utils::display::display_time64, -}; - -use common_error::{DaftError, DaftResult}; - use arrow2::{ array::Array, bitmap::utils::SlicesIterator, @@ -36,8 +13,8 @@ use arrow2::{ }, offset::Offsets, }; +use common_error::{DaftError, DaftResult}; use indexmap::IndexMap; - #[cfg(feature = "python")] use { crate::array::pseudo_arrow::PseudoArrowArray, @@ -51,6 +28,27 @@ use { std::iter, }; +use super::as_arrow::AsArrow; +use crate::{ + array::{ + growable::make_growable, + image_array::ImageArraySidecarData, + ops::{from_arrow::FromArrow, full::FullNull}, + DataArray, FixedSizeListArray, ListArray, StructArray, + }, + datatypes::{ + logical::{ + DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, + FixedShapeTensorArray, ImageArray, LogicalArray, MapArray, TensorArray, TimeArray, + TimestampArray, + }, + DaftArrayType, DaftArrowBackedType, DaftLogicalType, DataType, Field, ImageMode, + Int32Array, Int64Array, NullArray, TimeUnit, UInt64Array, Utf8Array, + }, + series::{IntoSeries, Series}, + utils::display::display_time64, +}; + impl DataArray where T: DaftArrowBackedType, @@ -59,8 +57,9 @@ where match dtype { #[cfg(feature = "python")] DataType::Python => { - use crate::python::PySeries; use pyo3::prelude::*; + + use crate::python::PySeries; // Convert something to Python. // Use the existing logic on the Python side of the PyO3 layer @@ -502,9 +501,10 @@ fn append_values_from_numpy< values_vec: &mut Vec, shapes_vec: &mut Vec, ) -> DaftResult<(usize, usize)> { - use daft_schema::python::PyDataType; use std::num::Wrapping; + use daft_schema::python::PyDataType; + let np_dtype = pyarray.getattr(pyo3::intern!(py, "dtype"))?; let datatype = from_numpy_dtype_fn @@ -998,8 +998,9 @@ fn extract_python_like_to_tensor_array< #[cfg(feature = "python")] impl PythonArray { pub fn cast(&self, dtype: &DataType) -> DaftResult { - use crate::python::PySeries; use pyo3::prelude::*; + + use crate::python::PySeries; match dtype { DataType::Python => Ok(self.clone().into_series()), diff --git a/src/daft-core/src/array/ops/cbrt.rs b/src/daft-core/src/array/ops/cbrt.rs index dfb0885474..4bc57640cc 100644 --- a/src/daft-core/src/array/ops/cbrt.rs +++ b/src/daft-core/src/array/ops/cbrt.rs @@ -1,6 +1,5 @@ -use num_traits::Float; - use common_error::DaftResult; +use num_traits::Float; use crate::{array::DataArray, datatypes::DaftNumericType}; diff --git a/src/daft-core/src/array/ops/ceil.rs b/src/daft-core/src/array/ops/ceil.rs index 3d677fff65..521caab8f7 100644 --- a/src/daft-core/src/array/ops/ceil.rs +++ b/src/daft-core/src/array/ops/ceil.rs @@ -1,3 +1,4 @@ +use common_error::DaftResult; use num_traits::Float; use crate::{ @@ -5,8 +6,6 @@ use crate::{ datatypes::{DaftFloatType, DaftNumericType}, }; -use common_error::DaftResult; - impl DataArray where T: DaftNumericType, diff --git a/src/daft-core/src/array/ops/compare_agg.rs b/src/daft-core/src/array/ops/compare_agg.rs index f358e91b7d..0fc139b36d 100644 --- a/src/daft-core/src/array/ops/compare_agg.rs +++ b/src/daft-core/src/array/ops/compare_agg.rs @@ -1,13 +1,11 @@ -use super::full::FullNull; -use super::{DaftCompareAggable, GroupIndices}; +use arrow2::array::{Array, PrimitiveArray}; +use common_error::DaftResult; + +use super::{full::FullNull, DaftCompareAggable, GroupIndices}; use crate::{ array::{ListArray, StructArray}, datatypes::*, }; -use arrow2::array::Array; -use arrow2::array::PrimitiveArray; - -use common_error::DaftResult; fn grouped_cmp_native( data_array: &DataArray, diff --git a/src/daft-core/src/array/ops/comparison.rs b/src/daft-core/src/array/ops/comparison.rs index 711975a341..0f76b338ff 100644 --- a/src/daft-core/src/array/ops/comparison.rs +++ b/src/daft-core/src/array/ops/comparison.rs @@ -1,5 +1,10 @@ +use std::ops::Not; + +use arrow2::{compute::comparison, scalar::PrimitiveScalar}; +use common_error::{DaftError, DaftResult}; use num_traits::{NumCast, ToPrimitive}; +use super::{as_arrow::AsArrow, from_arrow::FromArrow, full::FullNull, DaftCompare, DaftLogical}; use crate::{ array::DataArray, datatypes::{ @@ -9,15 +14,6 @@ use crate::{ utils::arrow::arrow_bitmap_and_helper, }; -use common_error::{DaftError, DaftResult}; - -use std::ops::Not; - -use super::{from_arrow::FromArrow, full::FullNull, DaftCompare, DaftLogical}; - -use super::as_arrow::AsArrow; -use arrow2::{compute::comparison, scalar::PrimitiveScalar}; - impl PartialEq for DataArray where T: DaftArrowBackedType + 'static, @@ -1751,9 +1747,10 @@ impl DaftCompare<&[u8]> for FixedSizeBinaryArray { #[cfg(test)] mod tests { - use crate::{array::ops::DaftCompare, datatypes::Int64Array}; use common_error::DaftResult; + use crate::{array::ops::DaftCompare, datatypes::Int64Array}; + #[test] fn equal_int64_array_with_scalar() -> DaftResult<()> { let array = Int64Array::arange("a", 1, 4, 1)?; diff --git a/src/daft-core/src/array/ops/concat.rs b/src/daft-core/src/array/ops/concat.rs index 597e036637..b83f2b150b 100644 --- a/src/daft-core/src/array/ops/concat.rs +++ b/src/daft-core/src/array/ops/concat.rs @@ -1,10 +1,9 @@ use arrow2::array::Array; - -use crate::{array::DataArray, datatypes::DaftPhysicalType}; use common_error::{DaftError, DaftResult}; #[cfg(feature = "python")] use crate::array::pseudo_arrow::PseudoArrowArray; +use crate::{array::DataArray, datatypes::DaftPhysicalType}; macro_rules! impl_variable_length_concat { ($fn_name:ident, $arrow_type:ty, $create_fn: ident) => { diff --git a/src/daft-core/src/array/ops/concat_agg.rs b/src/daft-core/src/array/ops/concat_agg.rs index ca81098a9b..6713597897 100644 --- a/src/daft-core/src/array/ops/concat_agg.rs +++ b/src/daft-core/src/array/ops/concat_agg.rs @@ -1,19 +1,19 @@ -use crate::array::{ - growable::{make_growable, Growable}, - ListArray, -}; use arrow2::{bitmap::utils::SlicesIterator, offset::OffsetsBuffer, types::Index}; use common_error::DaftResult; use super::{as_arrow::AsArrow, DaftConcatAggable}; +use crate::array::{ + growable::{make_growable, Growable}, + ListArray, +}; #[cfg(feature = "python")] impl DaftConcatAggable for crate::datatypes::PythonArray { type Output = DaftResult; fn concat(&self) -> Self::Output { + use pyo3::{prelude::*, types::PyList}; + use crate::array::pseudo_arrow::PseudoArrowArray; - use pyo3::prelude::*; - use pyo3::types::PyList; let pyobj_vec = self.as_arrow().to_pyobj_vec(); @@ -30,9 +30,9 @@ impl DaftConcatAggable for crate::datatypes::PythonArray { Self::new(self.field().clone().into(), Box::new(arrow_array)) } fn grouped_concat(&self, groups: &super::GroupIndices) -> Self::Output { + use pyo3::{prelude::*, types::PyList}; + use crate::array::pseudo_arrow::PseudoArrowArray; - use pyo3::prelude::*; - use pyo3::types::PyList; let mut result_pylists: Vec = Vec::with_capacity(groups.len()); @@ -154,8 +154,7 @@ mod test { use crate::{ array::{ops::DaftConcatAggable, ListArray}, - datatypes::DataType, - datatypes::{Field, Int64Array}, + datatypes::{DataType, Field, Int64Array}, series::IntoSeries, }; diff --git a/src/daft-core/src/array/ops/count.rs b/src/daft-core/src/array/ops/count.rs index e8074e8ac6..a7548f8584 100644 --- a/src/daft-core/src/array/ops/count.rs +++ b/src/daft-core/src/array/ops/count.rs @@ -1,13 +1,13 @@ use std::{iter::repeat, sync::Arc}; +use common_error::DaftResult; + +use super::{DaftCountAggable, GroupIndices}; use crate::{ array::{ListArray, StructArray}, count_mode::CountMode, datatypes::*, }; -use common_error::DaftResult; - -use super::{DaftCountAggable, GroupIndices}; /// Helper to perform a grouped count on a validity map of type arrow2::bitmap::Bitmap fn grouped_count_arrow_bitmap( diff --git a/src/daft-core/src/array/ops/exp.rs b/src/daft-core/src/array/ops/exp.rs index e59c1f5a5f..b1dc8c6ac3 100644 --- a/src/daft-core/src/array/ops/exp.rs +++ b/src/daft-core/src/array/ops/exp.rs @@ -1,6 +1,5 @@ -use num_traits::Float; - use common_error::DaftResult; +use num_traits::Float; use crate::{array::DataArray, datatypes::DaftNumericType}; diff --git a/src/daft-core/src/array/ops/filter.rs b/src/daft-core/src/array/ops/filter.rs index 292e195b8d..e255b10119 100644 --- a/src/daft-core/src/array/ops/filter.rs +++ b/src/daft-core/src/array/ops/filter.rs @@ -1,17 +1,16 @@ use std::borrow::Cow; +use arrow2::bitmap::utils::SlicesIterator; +use common_error::DaftResult; + +use super::{as_arrow::AsArrow, full::FullNull}; use crate::{ array::{ growable::{Growable, GrowableArray}, DataArray, FixedSizeListArray, ListArray, StructArray, }, - datatypes::DataType, - datatypes::{BooleanArray, DaftArrayType, DaftArrowBackedType}, + datatypes::{BooleanArray, DaftArrayType, DaftArrowBackedType, DataType}, }; -use arrow2::bitmap::utils::SlicesIterator; -use common_error::DaftResult; - -use super::{as_arrow::AsArrow, full::FullNull}; impl DataArray where @@ -29,8 +28,7 @@ impl crate::datatypes::PythonArray { use arrow2::array::Array; use pyo3::PyObject; - use crate::array::pseudo_arrow::PseudoArrowArray; - use crate::datatypes::PythonType; + use crate::{array::pseudo_arrow::PseudoArrowArray, datatypes::PythonType}; let mask = mask.as_arrow(); diff --git a/src/daft-core/src/array/ops/float.rs b/src/daft-core/src/array/ops/float.rs index afd0838563..8d82390d6a 100644 --- a/src/daft-core/src/array/ops/float.rs +++ b/src/daft-core/src/array/ops/float.rs @@ -1,14 +1,11 @@ +use common_error::DaftResult; +use num_traits::Float; + +use super::{as_arrow::AsArrow, DaftIsInf, DaftIsNan, DaftNotNan}; use crate::{ array::DataArray, datatypes::{BooleanArray, BooleanType, DaftFloatType, DaftNumericType, NullType}, }; -use common_error::DaftResult; -use num_traits::Float; - -use super::DaftIsInf; -use super::{DaftIsNan, DaftNotNan}; - -use super::as_arrow::AsArrow; impl DaftIsNan for DataArray where diff --git a/src/daft-core/src/array/ops/floor.rs b/src/daft-core/src/array/ops/floor.rs index 047cbbf98d..df2daa6677 100644 --- a/src/daft-core/src/array/ops/floor.rs +++ b/src/daft-core/src/array/ops/floor.rs @@ -1,3 +1,4 @@ +use common_error::DaftResult; use num_traits::Float; use crate::{ @@ -5,8 +6,6 @@ use crate::{ datatypes::{DaftFloatType, DaftNumericType}, }; -use common_error::DaftResult; - impl DataArray where T: DaftNumericType, diff --git a/src/daft-core/src/array/ops/from_arrow.rs b/src/daft-core/src/array/ops/from_arrow.rs index f32b005299..a635fe6e21 100644 --- a/src/daft-core/src/array/ops/from_arrow.rs +++ b/src/daft-core/src/array/ops/from_arrow.rs @@ -4,9 +4,9 @@ use common_error::{DaftError, DaftResult}; use crate::{ array::{DataArray, FixedSizeListArray, ListArray, StructArray}, - datatypes::DataType, datatypes::{ - logical::LogicalArray, DaftDataType, DaftLogicalType, DaftPhysicalType, Field, FieldRef, + logical::LogicalArray, DaftDataType, DaftLogicalType, DaftPhysicalType, DataType, Field, + FieldRef, }, series::Series, }; diff --git a/src/daft-core/src/array/ops/get.rs b/src/daft-core/src/array/ops/get.rs index 3485e743fb..a9b5a14ae6 100644 --- a/src/daft-core/src/array/ops/get.rs +++ b/src/daft-core/src/array/ops/get.rs @@ -1,3 +1,4 @@ +use super::as_arrow::AsArrow; use crate::{ array::{DataArray, FixedSizeListArray, ListArray}, datatypes::{ @@ -11,8 +12,6 @@ use crate::{ series::Series, }; -use super::as_arrow::AsArrow; - impl DataArray where T: DaftNumericType, @@ -174,8 +173,7 @@ mod tests { use crate::{ array::FixedSizeListArray, - datatypes::DataType, - datatypes::{Field, Int32Array}, + datatypes::{DataType, Field, Int32Array}, series::IntoSeries, }; diff --git a/src/daft-core/src/array/ops/groups.rs b/src/daft-core/src/array/ops/groups.rs index fa8e2c5178..9676ef3a52 100644 --- a/src/daft-core/src/array/ops/groups.rs +++ b/src/daft-core/src/array/ops/groups.rs @@ -1,8 +1,13 @@ -use std::collections::hash_map::Entry::{Occupied, Vacant}; +use std::{ + collections::hash_map::Entry::{Occupied, Vacant}, + hash::Hash, +}; use arrow2::array::Array; +use common_error::DaftResult; use fnv::FnvHashMap; +use super::{as_arrow::AsArrow, IntoGroups}; use crate::{ array::{DataArray, FixedSizeListArray, ListArray, StructArray}, datatypes::{ @@ -10,11 +15,6 @@ use crate::{ Float32Array, Float64Array, NullArray, Utf8Array, }, }; -use common_error::DaftResult; - -use super::{as_arrow::AsArrow, IntoGroups}; - -use std::hash::Hash; /// Given a list of values, return a `(Vec, Vec>)`. /// The sub-vector in the first part of the tuple contains the indices of the unique values. diff --git a/src/daft-core/src/array/ops/hash.rs b/src/daft-core/src/array/ops/hash.rs index 31ecc5e561..278f70ce79 100644 --- a/src/daft-core/src/array/ops/hash.rs +++ b/src/daft-core/src/array/ops/hash.rs @@ -1,3 +1,8 @@ +use arrow2::types::Index; +use common_error::{DaftError, DaftResult}; +use xxhash_rust::xxh3::{xxh3_64, xxh3_64_with_seed}; + +use super::as_arrow::AsArrow; use crate::{ array::{DataArray, FixedSizeListArray, ListArray, StructArray}, datatypes::{ @@ -11,12 +16,6 @@ use crate::{ utils::arrow::arrow_bitmap_and_helper, }; -use arrow2::types::Index; -use common_error::{DaftError, DaftResult}; -use xxhash_rust::xxh3::{xxh3_64, xxh3_64_with_seed}; - -use super::as_arrow::AsArrow; - impl DataArray where T: DaftNumericType, diff --git a/src/daft-core/src/array/ops/hll_merge.rs b/src/daft-core/src/array/ops/hll_merge.rs index 9dc62dfe66..84bae946af 100644 --- a/src/daft-core/src/array/ops/hll_merge.rs +++ b/src/daft-core/src/array/ops/hll_merge.rs @@ -1,12 +1,10 @@ use common_error::DaftResult; +use hyperloglog::HyperLogLog; use crate::{ - array::ops::{as_arrow::AsArrow, DaftHllMergeAggable}, + array::ops::{as_arrow::AsArrow, DaftHllMergeAggable, GroupIndices}, datatypes::{FixedSizeBinaryArray, UInt64Array}, }; -use hyperloglog::HyperLogLog; - -use crate::array::ops::GroupIndices; impl DaftHllMergeAggable for FixedSizeBinaryArray { type Output = DaftResult; diff --git a/src/daft-core/src/array/ops/hll_sketch.rs b/src/daft-core/src/array/ops/hll_sketch.rs index 455e5bfd39..5516d94892 100644 --- a/src/daft-core/src/array/ops/hll_sketch.rs +++ b/src/daft-core/src/array/ops/hll_sketch.rs @@ -1,13 +1,10 @@ -use crate::{ - array::ops::as_arrow::AsArrow, - datatypes::DataType, - datatypes::{FixedSizeBinaryArray, UInt64Array}, -}; -use hyperloglog::{HyperLogLog, NUM_REGISTERS}; - use common_error::DaftResult; +use hyperloglog::{HyperLogLog, NUM_REGISTERS}; -use crate::array::ops::{DaftHllSketchAggable, GroupIndices}; +use crate::{ + array::ops::{as_arrow::AsArrow, DaftHllSketchAggable, GroupIndices}, + datatypes::{DataType, FixedSizeBinaryArray, UInt64Array}, +}; pub const HLL_SKETCH_DTYPE: DataType = DataType::FixedSizeBinary(NUM_REGISTERS); diff --git a/src/daft-core/src/array/ops/if_else.rs b/src/daft-core/src/array/ops/if_else.rs index ac66306295..b92db36528 100644 --- a/src/daft-core/src/array/ops/if_else.rs +++ b/src/daft-core/src/array/ops/if_else.rs @@ -1,12 +1,16 @@ -use crate::array::growable::{Growable, GrowableArray}; -use crate::array::ops::full::FullNull; -use crate::array::{DataArray, FixedSizeListArray, ListArray, StructArray}; -use crate::datatypes::{BooleanArray, DaftPhysicalType}; -use crate::{datatypes::DataType, series::IntoSeries, series::Series}; use arrow2::array::Array; use common_error::DaftResult; use super::as_arrow::AsArrow; +use crate::{ + array::{ + growable::{Growable, GrowableArray}, + ops::full::FullNull, + DataArray, FixedSizeListArray, ListArray, StructArray, + }, + datatypes::{BooleanArray, DaftPhysicalType, DataType}, + series::{IntoSeries, Series}, +}; fn generic_if_else( predicate: &BooleanArray, diff --git a/src/daft-core/src/array/ops/is_in.rs b/src/daft-core/src/array/ops/is_in.rs index d4e5991a7e..304ec2df4b 100644 --- a/src/daft-core/src/array/ops/is_in.rs +++ b/src/daft-core/src/array/ops/is_in.rs @@ -1,10 +1,13 @@ -use crate::{array::prelude::*, array::DataArray, datatypes::prelude::*}; +use std::collections::{BTreeSet, HashSet}; -use super::as_arrow::AsArrow; -use super::{full::FullNull, DaftIsIn}; use common_error::DaftResult; use common_hashable_float_wrapper::FloatWrapper; -use std::collections::{BTreeSet, HashSet}; + +use super::{as_arrow::AsArrow, full::FullNull, DaftIsIn}; +use crate::{ + array::{prelude::*, DataArray}, + datatypes::prelude::*, +}; macro_rules! collect_to_set_and_check_membership { ($self:expr, $rhs:expr) => {{ diff --git a/src/daft-core/src/array/ops/len.rs b/src/daft-core/src/array/ops/len.rs index 6808736c90..4a1d8ff4dc 100644 --- a/src/daft-core/src/array/ops/len.rs +++ b/src/daft-core/src/array/ops/len.rs @@ -1,13 +1,12 @@ -use crate::{ - array::{DataArray, FixedSizeListArray, ListArray, StructArray}, - datatypes::DaftArrowBackedType, -}; use common_error::DaftResult; +use super::as_arrow::AsArrow; #[cfg(feature = "python")] use crate::datatypes::PythonArray; - -use super::as_arrow::AsArrow; +use crate::{ + array::{DataArray, FixedSizeListArray, ListArray, StructArray}, + datatypes::DaftArrowBackedType, +}; impl DataArray where @@ -23,8 +22,7 @@ where #[cfg(feature = "python")] impl PythonArray { pub fn size_bytes(&self) -> DaftResult { - use pyo3::prelude::*; - use pyo3::types::PyList; + use pyo3::{prelude::*, types::PyList}; let vector = self.as_arrow().values().to_vec(); Python::with_gil(|py| { diff --git a/src/daft-core/src/array/ops/list.rs b/src/daft-core/src/array/ops/list.rs index 1fa4c61184..4dd8cee2a8 100644 --- a/src/daft-core/src/array/ops/list.rs +++ b/src/daft-core/src/array/ops/list.rs @@ -1,25 +1,19 @@ -use std::iter::repeat; -use std::sync::Arc; +use std::{iter::repeat, sync::Arc}; -use crate::datatypes::DataType; -use crate::datatypes::{BooleanArray, Field, Int64Array, Utf8Array}; +use arrow2::offset::OffsetsBuffer; +use common_error::DaftResult; + +use super::as_arrow::AsArrow; use crate::{ array::{ growable::{make_growable, Growable}, FixedSizeListArray, ListArray, }, - datatypes::UInt64Array, + count_mode::CountMode, + datatypes::{BooleanArray, DataType, Field, Int64Array, UInt64Array, Utf8Array}, + series::{IntoSeries, Series}, }; -use crate::series::{IntoSeries, Series}; - -use arrow2::offset::OffsetsBuffer; -use common_error::DaftResult; - -use super::as_arrow::AsArrow; - -use crate::count_mode::CountMode; - fn join_arrow_list_of_utf8s( list_element: Option<&dyn arrow2::array::Array>, delimiter_str: &str, diff --git a/src/daft-core/src/array/ops/list_agg.rs b/src/daft-core/src/array/ops/list_agg.rs index 28ea5884a4..6e47d011ac 100644 --- a/src/daft-core/src/array/ops/list_agg.rs +++ b/src/daft-core/src/array/ops/list_agg.rs @@ -1,3 +1,6 @@ +use common_error::DaftResult; + +use super::{as_arrow::AsArrow, DaftListAggable, GroupIndices}; use crate::{ array::{ growable::{Growable, GrowableArray}, @@ -6,9 +9,6 @@ use crate::{ datatypes::DaftArrowBackedType, series::IntoSeries, }; -use common_error::DaftResult; - -use super::{as_arrow::AsArrow, DaftListAggable, GroupIndices}; impl DaftListAggable for DataArray where @@ -63,9 +63,9 @@ impl DaftListAggable for crate::datatypes::PythonArray { type Output = DaftResult; fn list(&self) -> Self::Output { + use pyo3::{prelude::*, types::PyList}; + use crate::array::pseudo_arrow::PseudoArrowArray; - use pyo3::prelude::*; - use pyo3::types::PyList; let pyobj_vec = self.as_arrow().to_pyobj_vec(); @@ -76,9 +76,9 @@ impl DaftListAggable for crate::datatypes::PythonArray { } fn grouped_list(&self, groups: &GroupIndices) -> Self::Output { + use pyo3::{prelude::*, types::PyList}; + use crate::array::pseudo_arrow::PseudoArrowArray; - use pyo3::prelude::*; - use pyo3::types::PyList; let mut result_pylists: Vec = Vec::with_capacity(groups.len()); diff --git a/src/daft-core/src/array/ops/log.rs b/src/daft-core/src/array/ops/log.rs index b1011d166f..2ac57dccde 100644 --- a/src/daft-core/src/array/ops/log.rs +++ b/src/daft-core/src/array/ops/log.rs @@ -1,9 +1,8 @@ +use common_error::DaftResult; use num_traits::Float; use crate::{array::DataArray, datatypes::DaftFloatType}; -use common_error::DaftResult; - impl DataArray where T: DaftFloatType, diff --git a/src/daft-core/src/array/ops/mean.rs b/src/daft-core/src/array/ops/mean.rs index 0398cb16ac..b4b4016bbc 100644 --- a/src/daft-core/src/array/ops/mean.rs +++ b/src/daft-core/src/array/ops/mean.rs @@ -1,15 +1,9 @@ use std::sync::Arc; -use crate::count_mode::CountMode; -use crate::datatypes::*; - use common_error::DaftResult; -use super::{DaftCountAggable, DaftMeanAggable, DaftSumAggable}; - -use super::as_arrow::AsArrow; - -use crate::array::ops::GroupIndices; +use super::{as_arrow::AsArrow, DaftCountAggable, DaftMeanAggable, DaftSumAggable}; +use crate::{array::ops::GroupIndices, count_mode::CountMode, datatypes::*}; impl DaftMeanAggable for &DataArray { type Output = DaftResult>; diff --git a/src/daft-core/src/array/ops/merge_sketch.rs b/src/daft-core/src/array/ops/merge_sketch.rs index f1ea2a4a48..e2bbfcdb2c 100644 --- a/src/daft-core/src/array/ops/merge_sketch.rs +++ b/src/daft-core/src/array/ops/merge_sketch.rs @@ -1,9 +1,11 @@ -use super::from_arrow::FromArrow; -use super::DaftMergeSketchAggable; -use crate::array::ops::GroupIndices; -use crate::{array::StructArray, datatypes::*}; use common_error::{DaftError, DaftResult}; +use super::{from_arrow::FromArrow, DaftMergeSketchAggable}; +use crate::{ + array::{ops::GroupIndices, StructArray}, + datatypes::*, +}; + impl DaftMergeSketchAggable for &StructArray { type Output = DaftResult; diff --git a/src/daft-core/src/array/ops/minhash.rs b/src/daft-core/src/array/ops/minhash.rs index 567268847a..edf7bb6a61 100644 --- a/src/daft-core/src/array/ops/minhash.rs +++ b/src/daft-core/src/array/ops/minhash.rs @@ -4,15 +4,13 @@ use arrow2::array::{MutableArray, MutablePrimitiveArray, PrimitiveArray}; use common_error::{DaftError, DaftResult}; use daft_minhash::load_simd; +use super::{as_arrow::AsArrow, DaftMinHash}; use crate::{ array::FixedSizeListArray, - datatypes::DataType, - datatypes::{Field, Utf8Array}, + datatypes::{DataType, Field, Utf8Array}, series::Series, }; -use super::{as_arrow::AsArrow, DaftMinHash}; - impl DaftMinHash for Utf8Array { type Output = DaftResult; diff --git a/src/daft-core/src/array/ops/mod.rs b/src/daft-core/src/array/ops/mod.rs index ba28710a52..97596e9dbc 100644 --- a/src/daft-core/src/array/ops/mod.rs +++ b/src/daft-core/src/array/ops/mod.rs @@ -57,13 +57,11 @@ pub mod trigonometry; mod truncate; mod utf8; +use common_error::DaftResult; pub use hll_sketch::HLL_SKETCH_DTYPE; pub use sort::{build_multi_array_bicompare, build_multi_array_compare}; - pub use utf8::{PadPlacement, Utf8NormalizeOptions}; -use common_error::DaftResult; - use crate::count_mode::CountMode; pub trait DaftCompare { diff --git a/src/daft-core/src/array/ops/null.rs b/src/daft-core/src/array/ops/null.rs index 4d589826ee..9e5d2efcc3 100644 --- a/src/daft-core/src/array/ops/null.rs +++ b/src/daft-core/src/array/ops/null.rs @@ -1,12 +1,12 @@ use std::{iter::repeat, sync::Arc}; +use common_error::DaftResult; + +use super::{DaftIsNull, DaftNotNull}; use crate::{ array::{ListArray, StructArray}, datatypes::*, }; -use common_error::DaftResult; - -use super::{DaftIsNull, DaftNotNull}; impl DataArray where diff --git a/src/daft-core/src/array/ops/pairwise.rs b/src/daft-core/src/array/ops/pairwise.rs index eb2e040b1c..35d9503553 100644 --- a/src/daft-core/src/array/ops/pairwise.rs +++ b/src/daft-core/src/array/ops/pairwise.rs @@ -1,10 +1,10 @@ +use common_error::DaftResult; + +use super::as_arrow::AsArrow; use crate::{ array::DataArray, datatypes::{BooleanArray, DaftNumericType, NullArray, UInt64Type, Utf8Array}, }; -use common_error::DaftResult; - -use super::as_arrow::AsArrow; impl DataArray where diff --git a/src/daft-core/src/array/ops/repr.rs b/src/daft-core/src/array/ops/repr.rs index d01e5d72f4..b7467d70e2 100644 --- a/src/daft-core/src/array/ops/repr.rs +++ b/src/daft-core/src/array/ops/repr.rs @@ -1,21 +1,20 @@ use common_display::table_display::StrValue; +use common_error::DaftResult; use crate::{ array::{DataArray, FixedSizeListArray, ListArray, StructArray}, - datatypes::DataType, datatypes::{ logical::{ DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, FixedShapeTensorArray, ImageArray, MapArray, TensorArray, TimeArray, TimestampArray, }, - BinaryArray, BooleanArray, DaftNumericType, ExtensionArray, FixedSizeBinaryArray, + BinaryArray, BooleanArray, DaftNumericType, DataType, ExtensionArray, FixedSizeBinaryArray, NullArray, UInt64Array, Utf8Array, }, series::Series, utils::display::{display_date32, display_decimal128, display_time64, display_timestamp}, with_match_daft_types, }; -use common_error::DaftResult; // Default implementation of str_value: format the value with the given format string. macro_rules! impl_array_str_value { diff --git a/src/daft-core/src/array/ops/round.rs b/src/daft-core/src/array/ops/round.rs index 4d35402005..b616f18211 100644 --- a/src/daft-core/src/array/ops/round.rs +++ b/src/daft-core/src/array/ops/round.rs @@ -1,9 +1,8 @@ +use common_error::DaftResult; use num_traits::Pow; use crate::datatypes::{Float32Array, Float64Array}; -use common_error::DaftResult; - impl Float32Array { pub fn round(&self, decimal: i32) -> DaftResult { if decimal == 0 { diff --git a/src/daft-core/src/array/ops/search_sorted.rs b/src/daft-core/src/array/ops/search_sorted.rs index 8abd7a99d4..bfd8d75824 100644 --- a/src/daft-core/src/array/ops/search_sorted.rs +++ b/src/daft-core/src/array/ops/search_sorted.rs @@ -1,9 +1,10 @@ +use common_error::DaftResult; + use crate::{ array::DataArray, datatypes::{DaftArrowBackedType, UInt64Array}, kernels::search_sorted, }; -use common_error::DaftResult; impl DataArray where diff --git a/src/daft-core/src/array/ops/sign.rs b/src/daft-core/src/array/ops/sign.rs index c3be34bb92..1549cae790 100644 --- a/src/daft-core/src/array/ops/sign.rs +++ b/src/daft-core/src/array/ops/sign.rs @@ -1,9 +1,7 @@ -use crate::{array::DataArray, datatypes::DaftNumericType}; -use num_traits::Signed; -use num_traits::Unsigned; -use num_traits::{One, Zero}; - use common_error::DaftResult; +use num_traits::{One, Signed, Unsigned, Zero}; + +use crate::{array::DataArray, datatypes::DaftNumericType}; impl DataArray where diff --git a/src/daft-core/src/array/ops/sketch_percentile.rs b/src/daft-core/src/array/ops/sketch_percentile.rs index 1fd36ba9f4..3f4f05a51b 100644 --- a/src/daft-core/src/array/ops/sketch_percentile.rs +++ b/src/daft-core/src/array/ops/sketch_percentile.rs @@ -1,17 +1,14 @@ use std::sync::Arc; -use crate::{ - array::{FixedSizeListArray, StructArray}, - datatypes::DataType, - datatypes::{Field, Float64Array}, - series::IntoSeries, - series::Series, -}; - use arrow2::array::{MutablePrimitiveArray, PrimitiveArray}; use common_error::DaftResult; use super::from_arrow::FromArrow; +use crate::{ + array::{FixedSizeListArray, StructArray}, + datatypes::{DataType, Field, Float64Array}, + series::{IntoSeries, Series}, +}; impl StructArray { pub fn sketch_percentile( diff --git a/src/daft-core/src/array/ops/sort.rs b/src/daft-core/src/array/ops/sort.rs index 1c85e48daf..ed7e818578 100644 --- a/src/daft-core/src/array/ops/sort.rs +++ b/src/daft-core/src/array/ops/sort.rs @@ -1,3 +1,12 @@ +use arrow2::{ + array::ord::{self, DynComparator}, + types::Index, +}; +use common_error::DaftResult; + +use super::{arrow2::sort::primitive::common::multi_column_idx_sort, as_arrow::AsArrow}; +#[cfg(feature = "python")] +use crate::datatypes::PythonArray; use crate::{ array::{DataArray, FixedSizeListArray, ListArray, StructArray}, datatypes::{ @@ -11,19 +20,6 @@ use crate::{ kernels::search_sorted::{build_compare_with_nulls, cmp_float}, series::Series, }; -use common_error::DaftResult; - -#[cfg(feature = "python")] -use crate::datatypes::PythonArray; - -use arrow2::{ - array::ord::{self, DynComparator}, - types::Index, -}; - -use super::arrow2::sort::primitive::common::multi_column_idx_sort; - -use super::as_arrow::AsArrow; pub fn build_multi_array_compare( arrays: &[Series], diff --git a/src/daft-core/src/array/ops/sqrt.rs b/src/daft-core/src/array/ops/sqrt.rs index 903bbfb12e..5cd91311ac 100644 --- a/src/daft-core/src/array/ops/sqrt.rs +++ b/src/daft-core/src/array/ops/sqrt.rs @@ -1,6 +1,5 @@ -use num_traits::Float; - use common_error::DaftResult; +use num_traits::Float; use crate::{array::DataArray, datatypes::DaftNumericType}; diff --git a/src/daft-core/src/array/ops/sum.rs b/src/daft-core/src/array/ops/sum.rs index 6f32759218..90c4e512a6 100644 --- a/src/daft-core/src/array/ops/sum.rs +++ b/src/daft-core/src/array/ops/sum.rs @@ -1,13 +1,8 @@ -use super::DaftSumAggable; - -use super::as_arrow::AsArrow; - -use crate::array::ops::GroupIndices; -use crate::datatypes::*; - +use arrow2::array::Array; use common_error::DaftResult; -use arrow2::array::Array; +use super::{as_arrow::AsArrow, DaftSumAggable}; +use crate::{array::ops::GroupIndices, datatypes::*}; macro_rules! impl_daft_numeric_agg { ($T:ident, $AggType: ident) => { impl DaftSumAggable for &DataArray<$T> { diff --git a/src/daft-core/src/array/ops/take.rs b/src/daft-core/src/array/ops/take.rs index e74b27c5dd..c45ad3ea56 100644 --- a/src/daft-core/src/array/ops/take.rs +++ b/src/daft-core/src/array/ops/take.rs @@ -1,12 +1,14 @@ -use crate::{ - array::growable::{Growable, GrowableArray}, - array::prelude::*, - datatypes::prelude::*, -}; use arrow2::types::Index; use common_error::DaftResult; use super::as_arrow::AsArrow; +use crate::{ + array::{ + growable::{Growable, GrowableArray}, + prelude::*, + }, + datatypes::prelude::*, +}; impl DataArray where @@ -109,12 +111,11 @@ impl crate::datatypes::PythonArray { I: DaftIntegerType, ::Native: arrow2::types::Index, { - use crate::array::pseudo_arrow::PseudoArrowArray; - use crate::datatypes::PythonType; - use arrow2::array::Array; use pyo3::prelude::*; + use crate::{array::pseudo_arrow::PseudoArrowArray, datatypes::PythonType}; + let indices = idx.as_arrow(); let old_values = self.as_arrow().values(); diff --git a/src/daft-core/src/array/ops/time.rs b/src/daft-core/src/array/ops/time.rs index 5296197bbb..9f0fcd4a61 100644 --- a/src/daft-core/src/array/ops/time.rs +++ b/src/daft-core/src/array/ops/time.rs @@ -1,11 +1,9 @@ -use crate::array::prelude::*; -use crate::datatypes::prelude::*; - use arrow2::{array::PrimitiveArray, compute::arithmetics::ArraySub}; use chrono::{Duration, NaiveDate, NaiveTime, Timelike}; use common_error::{DaftError, DaftResult}; use super::as_arrow::AsArrow; +use crate::{array::prelude::*, datatypes::prelude::*}; fn process_interval(interval: &str, timeunit: TimeUnit) -> DaftResult { let (count_str, unit) = interval.split_once(' ').ok_or_else(|| { diff --git a/src/daft-core/src/array/ops/trigonometry.rs b/src/daft-core/src/array/ops/trigonometry.rs index 7bc45bab92..659e11e39e 100644 --- a/src/daft-core/src/array/ops/trigonometry.rs +++ b/src/daft-core/src/array/ops/trigonometry.rs @@ -1,12 +1,12 @@ +use common_error::DaftResult; use num_traits::Float; use serde::{Deserialize, Serialize}; -use common_error::DaftResult; - -use crate::array::DataArray; -use crate::datatypes::{DaftFloatType, Float32Array, Float64Array}; - use super::DaftAtan2; +use crate::{ + array::DataArray, + datatypes::{DaftFloatType, Float32Array, Float64Array}, +}; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum TrigonometricFunction { diff --git a/src/daft-core/src/array/ops/truncate.rs b/src/daft-core/src/array/ops/truncate.rs index df0116dcca..3c1eff33b6 100644 --- a/src/daft-core/src/array/ops/truncate.rs +++ b/src/daft-core/src/array/ops/truncate.rs @@ -3,6 +3,7 @@ use std::ops::Rem; use common_error::DaftResult; use num_traits::ToPrimitive; +use super::as_arrow::AsArrow; use crate::{ array::DataArray, datatypes::{ @@ -11,8 +12,6 @@ use crate::{ }, }; -use super::as_arrow::AsArrow; - macro_rules! impl_int_truncate { ($DT:ty) => { impl DataArray<$DT> { diff --git a/src/daft-core/src/array/ops/utf8.rs b/src/daft-core/src/array/ops/utf8.rs index a753b9f208..a8e80623c1 100644 --- a/src/daft-core/src/array/ops/utf8.rs +++ b/src/daft-core/src/array/ops/utf8.rs @@ -3,7 +3,6 @@ use std::{ iter::{self, Repeat, Take}, }; -use crate::{array::prelude::*, datatypes::prelude::*, series::Series}; use aho_corasick::{AhoCorasickBuilder, MatchKind}; use arrow2::{array::Array, temporal_conversions}; use chrono::Datelike; @@ -14,6 +13,7 @@ use serde::{Deserialize, Serialize}; use unicode_normalization::{is_nfd_quick, IsNormalized, UnicodeNormalization}; use super::{as_arrow::AsArrow, full::FullNull}; +use crate::{array::prelude::*, datatypes::prelude::*, series::Series}; enum BroadcastedStrIter<'a> { Repeat(std::iter::Take>>), diff --git a/src/daft-core/src/array/prelude.rs b/src/daft-core/src/array/prelude.rs index 20044c02f2..fdcc8fda72 100644 --- a/src/daft-core/src/array/prelude.rs +++ b/src/daft-core/src/array/prelude.rs @@ -1,18 +1,17 @@ -pub use crate::datatypes::{ - BinaryArray, BooleanArray, ExtensionArray, FixedSizeBinaryArray, Float32Array, Float64Array, - Int128Array, Int16Array, Int32Array, Int64Array, Int8Array, NullArray, UInt16Array, - UInt32Array, UInt64Array, UInt8Array, Utf8Array, -}; - pub use super::{DataArray, FixedSizeListArray, ListArray, StructArray}; - // Import logical array types pub use crate::datatypes::logical::{ DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, FixedShapeTensorArray, ImageArray, LogicalArray, MapArray, TensorArray, TimeArray, TimestampArray, }; - -pub use crate::array::ops::{ - as_arrow::AsArrow, from_arrow::FromArrow, full::FullNull, DaftCompare, DaftLogical, +pub use crate::{ + array::ops::{ + as_arrow::AsArrow, from_arrow::FromArrow, full::FullNull, DaftCompare, DaftLogical, + }, + datatypes::{ + BinaryArray, BooleanArray, ExtensionArray, FixedSizeBinaryArray, Float32Array, + Float64Array, Int128Array, Int16Array, Int32Array, Int64Array, Int8Array, NullArray, + UInt16Array, UInt32Array, UInt64Array, UInt8Array, Utf8Array, + }, }; diff --git a/src/daft-core/src/array/pseudo_arrow/compute.rs b/src/daft-core/src/array/pseudo_arrow/compute.rs index e56bdafffe..8f513a5857 100644 --- a/src/daft-core/src/array/pseudo_arrow/compute.rs +++ b/src/daft-core/src/array/pseudo_arrow/compute.rs @@ -1,8 +1,8 @@ use std::ops::Not; +use arrow2::{array::Array, bitmap::Bitmap}; + use crate::array::pseudo_arrow::PseudoArrowArray; -use arrow2::array::Array; -use arrow2::bitmap::Bitmap; impl PseudoArrowArray { pub fn concatenate(arrays: Vec<&Self>) -> Self { diff --git a/src/daft-core/src/array/pseudo_arrow/mod.rs b/src/daft-core/src/array/pseudo_arrow/mod.rs index 1186f48370..d54abaac7d 100644 --- a/src/daft-core/src/array/pseudo_arrow/mod.rs +++ b/src/daft-core/src/array/pseudo_arrow/mod.rs @@ -206,7 +206,6 @@ use arrow2::{ buffer::Buffer, datatypes::DataType, }; - use common_error::{DaftError, DaftResult}; pub mod compute; diff --git a/src/daft-core/src/array/pseudo_arrow/python.rs b/src/daft-core/src/array/pseudo_arrow/python.rs index df658824a6..0beb5fd950 100644 --- a/src/daft-core/src/array/pseudo_arrow/python.rs +++ b/src/daft-core/src/array/pseudo_arrow/python.rs @@ -1,9 +1,8 @@ -use crate::array::pseudo_arrow::PseudoArrowArray; -use arrow2::array::Array; -use arrow2::bitmap::Bitmap; - +use arrow2::{array::Array, bitmap::Bitmap}; use pyo3::prelude::*; +use crate::array::pseudo_arrow::PseudoArrowArray; + impl PseudoArrowArray { pub fn from_pyobj_vec(pyobj_vec: Vec) -> Self { // Converts this Vec into a PseudoArrowArray. diff --git a/src/daft-core/src/array/serdes.rs b/src/daft-core/src/array/serdes.rs index 2a7d9ff7a4..9ba3905680 100644 --- a/src/daft-core/src/array/serdes.rs +++ b/src/daft-core/src/array/serdes.rs @@ -2,6 +2,9 @@ use std::cell::RefCell; use serde::ser::SerializeMap; +use super::{ops::as_arrow::AsArrow, DataArray, FixedSizeListArray, ListArray, StructArray}; +#[cfg(feature = "python")] +use crate::datatypes::PythonArray; use crate::{ datatypes::{ logical::LogicalArray, BinaryArray, BooleanArray, DaftLogicalType, DaftNumericType, @@ -10,11 +13,6 @@ use crate::{ series::{IntoSeries, Series}, }; -#[cfg(feature = "python")] -use crate::datatypes::PythonArray; - -use super::{ops::as_arrow::AsArrow, DataArray, FixedSizeListArray, ListArray, StructArray}; - pub struct IterSer where I: IntoIterator, diff --git a/src/daft-core/src/array/struct_array.rs b/src/daft-core/src/array/struct_array.rs index 92ac656bfc..fb0c50fb25 100644 --- a/src/daft-core/src/array/struct_array.rs +++ b/src/daft-core/src/array/struct_array.rs @@ -2,10 +2,11 @@ use std::sync::Arc; use common_error::{DaftError, DaftResult}; -use crate::array::growable::{Growable, GrowableArray}; -use crate::datatypes::DataType; -use crate::datatypes::{DaftArrayType, Field}; -use crate::series::Series; +use crate::{ + array::growable::{Growable, GrowableArray}, + datatypes::{DaftArrayType, DataType, Field}, + series::Series, +}; #[derive(Clone, Debug)] pub struct StructArray { diff --git a/src/daft-core/src/count_mode.rs b/src/daft-core/src/count_mode.rs index fd7343a564..7ef22f452f 100644 --- a/src/daft-core/src/count_mode.rs +++ b/src/daft-core/src/count_mode.rs @@ -1,12 +1,11 @@ +use std::str::FromStr; + +use common_error::{DaftError, DaftResult}; use common_py_serde::impl_bincode_py_state_serialization; +use derive_more::Display; #[cfg(feature = "python")] use pyo3::{exceptions::PyValueError, prelude::*}; use serde::{Deserialize, Serialize}; -use std::str::FromStr; - -use derive_more::Display; - -use common_error::{DaftError, DaftResult}; /// Supported count modes for Daft's count aggregation. /// diff --git a/src/daft-core/src/datatypes/infer_datatype.rs b/src/daft-core/src/datatypes/infer_datatype.rs index 51679381a5..9c05eb0b02 100644 --- a/src/daft-core/src/datatypes/infer_datatype.rs +++ b/src/daft-core/src/datatypes/infer_datatype.rs @@ -5,9 +5,8 @@ use std::{ use common_error::{DaftError, DaftResult}; -use crate::utils::supertype::try_get_supertype; - use super::DataType; +use crate::utils::supertype::try_get_supertype; // This is a stopgap to keep this logic separated from the DataTypes themselves // Once we convert daft-dsl to a root level crate, this logic should move there diff --git a/src/daft-core/src/datatypes/logical.rs b/src/daft-core/src/datatypes/logical.rs index dddc56ebc2..d61963ac69 100644 --- a/src/daft-core/src/datatypes/logical.rs +++ b/src/daft-core/src/datatypes/logical.rs @@ -1,10 +1,5 @@ use std::{marker::PhantomData, sync::Arc}; -use crate::{ - array::{ListArray, StructArray}, - datatypes::{DaftLogicalType, DateType, Field}, - with_match_daft_logical_primitive_types, -}; use common_error::DaftResult; use super::{ @@ -12,6 +7,11 @@ use super::{ FixedShapeImageType, FixedShapeTensorType, FixedSizeListArray, ImageType, MapType, TensorType, TimeType, TimestampType, }; +use crate::{ + array::{ListArray, StructArray}, + datatypes::{DaftLogicalType, DateType, Field}, + with_match_daft_logical_primitive_types, +}; /// A LogicalArray is a wrapper on top of some underlying array, applying the semantic meaning of its /// field.datatype() to the underlying array. diff --git a/src/daft-core/src/datatypes/mod.rs b/src/daft-core/src/datatypes/mod.rs index 952455f38b..171f8fc957 100644 --- a/src/daft-core/src/datatypes/mod.rs +++ b/src/daft-core/src/datatypes/mod.rs @@ -4,26 +4,27 @@ mod matching; pub use infer_datatype::InferDataType; pub mod prelude; -use crate::array::{ops::as_arrow::AsArrow, ListArray, StructArray}; -pub use crate::array::{DataArray, FixedSizeListArray}; +use std::ops::{Add, Div, Mul, Rem, Sub}; + pub use agg_ops::{try_mean_supertype, try_sum_supertype}; use arrow2::{ compute::comparison::Simd8, types::{simd::Simd, NativeType}, }; +// Import DataType enum +pub use daft_schema::dtype::DataType; +pub use daft_schema::{ + field::{Field, FieldID, FieldRef}, + image_format::ImageFormat, + image_mode::ImageMode, + time_unit::{infer_timeunit_from_format_string, TimeUnit}, +}; pub use infer_datatype::try_physical_supertype; use num_traits::{Bounded, Float, FromPrimitive, Num, NumCast, ToPrimitive, Zero}; use serde::Serialize; -use std::ops::{Add, Div, Mul, Rem, Sub}; - -pub use daft_schema::field::{Field, FieldID, FieldRef}; -pub use daft_schema::image_format::ImageFormat; -pub use daft_schema::image_mode::ImageMode; -pub use daft_schema::time_unit::{infer_timeunit_from_format_string, TimeUnit}; - -// Import DataType enum -pub use daft_schema::dtype::DataType; +use crate::array::{ops::as_arrow::AsArrow, ListArray, StructArray}; +pub use crate::array::{DataArray, FixedSizeListArray}; pub mod logical; diff --git a/src/daft-core/src/datatypes/prelude.rs b/src/daft-core/src/datatypes/prelude.rs index 64c8021e9b..6574bbe7d9 100644 --- a/src/daft-core/src/datatypes/prelude.rs +++ b/src/daft-core/src/datatypes/prelude.rs @@ -1,36 +1,29 @@ // Import basic array types and their corresponding data types -pub use super::{ - BinaryType, BooleanType, ExtensionType, FixedSizeBinaryType, FixedSizeListType, Float32Type, - Float64Type, Int128Type, Int16Type, Int32Type, Int64Type, Int8Type, NullType, UInt16Type, - UInt32Type, UInt64Type, UInt8Type, Utf8Type, -}; - -// Import utility types and structs - -pub use daft_schema::field::{Field, FieldID, FieldRef}; - -pub use daft_schema::image_format::ImageFormat; -pub use daft_schema::image_mode::ImageMode; -pub use daft_schema::time_unit::TimeUnit; - // Import DataType enum -pub use daft_schema::dtype::DataType; +// Import utility types and structs +pub use daft_schema::{ + dtype::DataType, + field::{Field, FieldID, FieldRef}, + image_format::ImageFormat, + image_mode::ImageMode, + time_unit::TimeUnit, +}; // Conditionally import PythonArray if the 'python' feature is enabled #[cfg(feature = "python")] pub use super::PythonArray; - +pub use super::{ + BinaryType, BooleanType, DaftArrayType, ExtensionType, FixedSizeBinaryType, FixedSizeListType, + Float32Type, Float64Type, Int128Type, Int16Type, Int32Type, Int64Type, Int8Type, NullType, + UInt16Type, UInt32Type, UInt64Type, UInt8Type, Utf8Type, +}; // Import trait definitions pub use super::{ DaftArrowBackedType, DaftDataType, DaftIntegerType, DaftListLikeType, DaftLogicalType, DaftNumericType, DaftPhysicalType, }; - pub use crate::datatypes::{ - DateType, Decimal128Type, DurationType, EmbeddingType, FixedShapeImageType, - FixedShapeTensorType, ImageType, MapType, TensorType, TimeType, TimestampType, + logical::DaftImageryType, DateType, Decimal128Type, DurationType, EmbeddingType, + FixedShapeImageType, FixedShapeTensorType, ImageType, MapType, TensorType, TimeType, + TimestampType, }; - -pub use crate::datatypes::logical::DaftImageryType; - -pub use super::DaftArrayType; diff --git a/src/daft-core/src/join.rs b/src/daft-core/src/join.rs index 365d141757..62746fbfb1 100644 --- a/src/daft-core/src/join.rs +++ b/src/daft-core/src/join.rs @@ -5,7 +5,6 @@ use common_py_serde::impl_bincode_py_state_serialization; use derive_more::Display; #[cfg(feature = "python")] use pyo3::{exceptions::PyValueError, pyclass, pymethods, PyObject, PyResult, Python}; - use serde::{Deserialize, Serialize}; /// Type of a join operation. diff --git a/src/daft-core/src/kernels/hashing.rs b/src/daft-core/src/kernels/hashing.rs index 293f4bc803..da15e90248 100644 --- a/src/daft-core/src/kernels/hashing.rs +++ b/src/daft-core/src/kernels/hashing.rs @@ -7,9 +7,10 @@ use arrow2::{ error::{Error, Result}, types::{NativeType, Offset}, }; - -use xxhash_rust::const_xxh3; -use xxhash_rust::xxh3::{xxh3_64, xxh3_64_with_seed}; +use xxhash_rust::{ + const_xxh3, + xxh3::{xxh3_64, xxh3_64_with_seed}, +}; fn hash_primitive( array: &PrimitiveArray, diff --git a/src/daft-core/src/kernels/utf8.rs b/src/daft-core/src/kernels/utf8.rs index 6ea04445e5..16d490229d 100644 --- a/src/daft-core/src/kernels/utf8.rs +++ b/src/daft-core/src/kernels/utf8.rs @@ -4,7 +4,6 @@ // **************************************************************************************** use arrow2::array::Utf8Array; - use common_error::{DaftError, DaftResult}; fn concat_strings(l: &str, r: &str) -> String { diff --git a/src/daft-core/src/prelude.rs b/src/daft-core/src/prelude.rs index 3bc5ed0f72..3b71045ddd 100644 --- a/src/daft-core/src/prelude.rs +++ b/src/daft-core/src/prelude.rs @@ -3,20 +3,18 @@ //! This module re-exports commonly used items from the Daft core library. // Re-export core series structures -pub use crate::series::{IntoSeries, Series}; - -// Re-export common data types and arrays -pub use crate::datatypes::prelude::*; - -pub use crate::array::prelude::*; +pub use daft_schema::schema::{Schema, SchemaRef}; // Re-export count mode enum pub use crate::count_mode::CountMode; - -pub use daft_schema::schema::{Schema, SchemaRef}; - +// Re-export common data types and arrays +pub use crate::datatypes::prelude::*; // Re-export join-related types pub use crate::join::{JoinStrategy, JoinType}; +pub use crate::{ + array::prelude::*, + series::{IntoSeries, Series}, +}; // You might want to include a glob import for users who want everything pub mod all { diff --git a/src/daft-core/src/python/mod.rs b/src/daft-core/src/python/mod.rs index 8134d7c41c..cb9ea8da12 100644 --- a/src/daft-core/src/python/mod.rs +++ b/src/daft-core/src/python/mod.rs @@ -1,9 +1,8 @@ use pyo3::prelude::*; pub mod series; -pub use series::PySeries; - pub use daft_schema::python::{field::PyField, schema::PySchema, PyDataType, PyTimeUnit}; +pub use series::PySeries; pub fn register_modules(parent: &Bound) -> PyResult<()> { parent.add_class::()?; diff --git a/src/daft-core/src/python/series.rs b/src/daft-core/src/python/series.rs index bc265d3c67..5764d5d610 100644 --- a/src/daft-core/src/python/series.rs +++ b/src/daft-core/src/python/series.rs @@ -1,5 +1,7 @@ use std::ops::{Add, Div, Mul, Rem, Sub}; +use common_arrow_ffi as ffi; +use daft_schema::python::PyDataType; use pyo3::{ exceptions::PyValueError, prelude::*, @@ -9,7 +11,10 @@ use pyo3::{ use crate::{ array::{ - ops::{DaftLogical, Utf8NormalizeOptions}, + ops::{ + as_arrow::AsArrow, trigonometry::TrigonometricFunction, DaftLogical, + Utf8NormalizeOptions, + }, pseudo_arrow::PseudoArrowArray, DataArray, }, @@ -19,12 +24,6 @@ use crate::{ utils::arrow::{cast_array_for_daft_if_needed, cast_array_from_daft_if_needed}, }; -use common_arrow_ffi as ffi; - -use crate::array::ops::as_arrow::AsArrow; -use crate::array::ops::trigonometry::TrigonometricFunction; -use daft_schema::python::PyDataType; - #[pyclass] #[derive(Clone)] pub struct PySeries { diff --git a/src/daft-core/src/series/array_impl/binary_ops.rs b/src/daft-core/src/series/array_impl/binary_ops.rs index 71190ed70a..15b2d9e273 100644 --- a/src/daft-core/src/series/array_impl/binary_ops.rs +++ b/src/daft-core/src/series/array_impl/binary_ops.rs @@ -2,31 +2,24 @@ use std::ops::{Add, Div, Mul, Rem, Sub}; use common_error::DaftResult; -use crate::datatypes::logical::{ - DateArray, DurationArray, EmbeddingArray, FixedShapeImageArray, FixedShapeTensorArray, - ImageArray, TensorArray, TimeArray, TimestampArray, -}; -use crate::datatypes::InferDataType; -use crate::datatypes::{ - BinaryArray, BooleanArray, ExtensionArray, Float32Array, Float64Array, Int16Array, Int32Array, - Int64Array, Int8Array, NullArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, Utf8Array, -}; +use super::{ArrayWrapper, IntoSeries, Series}; use crate::{ array::{ ops::{DaftCompare, DaftLogical}, FixedSizeListArray, ListArray, StructArray, }, - datatypes::DataType, datatypes::{ - logical::{Decimal128Array, MapArray}, - Field, FixedSizeBinaryArray, Int128Array, + logical::{ + DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, + FixedShapeTensorArray, ImageArray, MapArray, TensorArray, TimeArray, TimestampArray, + }, + BinaryArray, BooleanArray, DataType, ExtensionArray, Field, FixedSizeBinaryArray, + Float32Array, Float64Array, InferDataType, Int128Array, Int16Array, Int32Array, Int64Array, + Int8Array, NullArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, Utf8Array, }, series::series_like::SeriesLike, with_match_comparable_daft_types, with_match_integer_daft_types, with_match_numeric_daft_types, }; - -use super::{ArrayWrapper, IntoSeries, Series}; - #[cfg(feature = "python")] use crate::{datatypes::PythonArray, series::ops::py_binary_op_utilfn}; diff --git a/src/daft-core/src/series/array_impl/data_array.rs b/src/daft-core/src/series/array_impl/data_array.rs index 3fdaf527fc..f1cac0d31b 100644 --- a/src/daft-core/src/series/array_impl/data_array.rs +++ b/src/daft-core/src/series/array_impl/data_array.rs @@ -1,20 +1,20 @@ -use super::{ArrayWrapper, IntoSeries, Series}; use std::sync::Arc; -use crate::array::ops::broadcast::Broadcastable; -use crate::array::ops::DaftListAggable; -use crate::array::ops::GroupIndices; -use crate::array::DataArray; -use crate::datatypes::DaftArrowBackedType; +use common_error::DaftResult; -use crate::datatypes::FixedSizeBinaryArray; +use super::{ArrayWrapper, IntoSeries, Series}; #[cfg(feature = "python")] use crate::datatypes::PythonArray; -use crate::series::array_impl::binary_ops::SeriesBinaryOps; -use crate::{array::prelude::*, series::series_like::SeriesLike, with_match_integer_daft_types}; -use common_error::DaftResult; - -use crate::datatypes::DataType; +use crate::{ + array::{ + ops::{broadcast::Broadcastable, DaftListAggable, GroupIndices}, + prelude::*, + DataArray, + }, + datatypes::{DaftArrowBackedType, DataType, FixedSizeBinaryArray}, + series::{array_impl::binary_ops::SeriesBinaryOps, series_like::SeriesLike}, + with_match_integer_daft_types, +}; impl IntoSeries for DataArray where diff --git a/src/daft-core/src/series/array_impl/logical_array.rs b/src/daft-core/src/series/array_impl/logical_array.rs index 9b5ca9e5f6..aa978cf693 100644 --- a/src/daft-core/src/series/array_impl/logical_array.rs +++ b/src/daft-core/src/series/array_impl/logical_array.rs @@ -1,13 +1,12 @@ -use crate::array::prelude::*; -use crate::datatypes::prelude::*; +use std::sync::Arc; use super::{ArrayWrapper, IntoSeries, Series}; -use crate::array::ops::GroupIndices; -use crate::series::array_impl::binary_ops::SeriesBinaryOps; -use crate::series::DaftResult; -use crate::series::SeriesLike; -use crate::with_match_integer_daft_types; -use std::sync::Arc; +use crate::{ + array::{ops::GroupIndices, prelude::*}, + datatypes::prelude::*, + series::{array_impl::binary_ops::SeriesBinaryOps, DaftResult, SeriesLike}, + with_match_integer_daft_types, +}; impl IntoSeries for LogicalArray where diff --git a/src/daft-core/src/series/array_impl/nested_array.rs b/src/daft-core/src/series/array_impl/nested_array.rs index 9ec9939d11..5a6adb8b4c 100644 --- a/src/daft-core/src/series/array_impl/nested_array.rs +++ b/src/daft-core/src/series/array_impl/nested_array.rs @@ -2,15 +2,16 @@ use std::sync::Arc; use common_error::{DaftError, DaftResult}; -use crate::array::ops::broadcast::Broadcastable; -use crate::array::ops::{DaftIsNull, DaftNotNull, GroupIndices}; -use crate::array::{FixedSizeListArray, ListArray, StructArray}; -use crate::datatypes::BooleanArray; -use crate::datatypes::Field; -use crate::series::{array_impl::binary_ops::SeriesBinaryOps, IntoSeries, Series, SeriesLike}; -use crate::{datatypes::DataType, with_match_integer_daft_types}; - use super::ArrayWrapper; +use crate::{ + array::{ + ops::{broadcast::Broadcastable, DaftIsNull, DaftNotNull, GroupIndices}, + FixedSizeListArray, ListArray, StructArray, + }, + datatypes::{BooleanArray, DataType, Field}, + series::{array_impl::binary_ops::SeriesBinaryOps, IntoSeries, Series, SeriesLike}, + with_match_integer_daft_types, +}; macro_rules! impl_series_like_for_nested_arrays { ($da:ident) => { diff --git a/src/daft-core/src/series/from.rs b/src/daft-core/src/series/from.rs index f1a66fbc2b..99776edf64 100644 --- a/src/daft-core/src/series/from.rs +++ b/src/daft-core/src/series/from.rs @@ -1,15 +1,14 @@ use std::sync::Arc; +use common_error::{DaftError, DaftResult}; + +use super::Series; use crate::{ + array::ops::from_arrow::FromArrow, datatypes::{DataType, Field}, + series::array_impl::IntoSeries, with_match_daft_types, }; -use common_error::{DaftError, DaftResult}; - -use super::Series; - -use crate::array::ops::from_arrow::FromArrow; -use crate::series::array_impl::IntoSeries; impl Series { pub fn try_from_field_and_arrow_array( diff --git a/src/daft-core/src/series/mod.rs b/src/daft-core/src/series/mod.rs index 7154bb42f1..92384296d5 100644 --- a/src/daft-core/src/series/mod.rs +++ b/src/daft-core/src/series/mod.rs @@ -3,9 +3,15 @@ mod from; mod ops; mod serdes; mod series_like; -use derive_more::Display; use std::sync::Arc; +pub use array_impl::IntoSeries; +use common_display::table_display::{make_comfy_table, StrValue}; +use common_error::DaftResult; +use derive_more::Display; +pub use ops::cast_series_to_supertype; + +pub(crate) use self::series_like::SeriesLike; use crate::{ array::{ ops::{from_arrow::FromArrow, full::FullNull, DaftCompare}, @@ -14,13 +20,6 @@ use crate::{ datatypes::{DaftDataType, DaftNumericType, DataType, Field, FieldRef, NumericNative}, with_match_daft_types, }; -use common_display::table_display::{make_comfy_table, StrValue}; -use common_error::DaftResult; - -pub use array_impl::IntoSeries; -pub use ops::cast_series_to_supertype; - -pub(crate) use self::series_like::SeriesLike; #[derive(Clone, Debug, Display)] #[display("{}\n", self.to_comfy_table())] diff --git a/src/daft-core/src/series/ops/abs.rs b/src/daft-core/src/series/ops/abs.rs index da3d092ca7..1ea9b47587 100644 --- a/src/daft-core/src/series/ops/abs.rs +++ b/src/daft-core/src/series/ops/abs.rs @@ -1,8 +1,9 @@ -use crate::datatypes::DataType; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{ + datatypes::DataType, + series::{array_impl::IntoSeries, Series}, +}; impl Series { pub fn abs(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/agg.rs b/src/daft-core/src/series/ops/agg.rs index 5042b57ce9..44a4c10348 100644 --- a/src/daft-core/src/series/ops/agg.rs +++ b/src/daft-core/src/series/ops/agg.rs @@ -1,13 +1,17 @@ -use crate::array::ops::DaftHllMergeAggable; -use crate::array::ListArray; -use crate::count_mode::CountMode; -use crate::series::IntoSeries; -use crate::{array::ops::GroupIndices, series::Series, with_match_physical_daft_types}; use arrow2::array::PrimitiveArray; use common_error::{DaftError, DaftResult}; use logical::Decimal128Array; -use crate::datatypes::*; +use crate::{ + array::{ + ops::{DaftHllMergeAggable, GroupIndices}, + ListArray, + }, + count_mode::CountMode, + datatypes::*, + series::{IntoSeries, Series}, + with_match_physical_daft_types, +}; impl Series { pub fn count(&self, groups: Option<&GroupIndices>, mode: CountMode) -> DaftResult { @@ -22,8 +26,7 @@ impl Series { } pub fn sum(&self, groups: Option<&GroupIndices>) -> DaftResult { - use crate::array::ops::DaftSumAggable; - use crate::datatypes::DataType::*; + use crate::{array::ops::DaftSumAggable, datatypes::DataType::*}; match self.data_type() { // intX -> int64 (in line with numpy) @@ -92,8 +95,7 @@ impl Series { } pub fn approx_sketch(&self, groups: Option<&GroupIndices>) -> DaftResult { - use crate::array::ops::DaftApproxSketchAggable; - use crate::datatypes::DataType::*; + use crate::{array::ops::DaftApproxSketchAggable, datatypes::DataType::*}; // Upcast all numeric types to float64 and compute approx_sketch. match self.data_type() { @@ -118,8 +120,7 @@ impl Series { } pub fn merge_sketch(&self, groups: Option<&GroupIndices>) -> DaftResult { - use crate::array::ops::DaftMergeSketchAggable; - use crate::datatypes::DataType::*; + use crate::{array::ops::DaftMergeSketchAggable, datatypes::DataType::*}; match self.data_type() { Struct(_) => match groups { @@ -148,8 +149,7 @@ impl Series { } pub fn mean(&self, groups: Option<&GroupIndices>) -> DaftResult { - use crate::array::ops::DaftMeanAggable; - use crate::datatypes::DataType::*; + use crate::{array::ops::DaftMeanAggable, datatypes::DataType::*}; // Upcast all numeric types to float64 and use f64 mean kernel. match self.data_type() { diff --git a/src/daft-core/src/series/ops/arithmetic.rs b/src/daft-core/src/series/ops/arithmetic.rs index 4f0c5f21ef..b49eba83d4 100644 --- a/src/daft-core/src/series/ops/arithmetic.rs +++ b/src/daft-core/src/series/ops/arithmetic.rs @@ -1,9 +1,9 @@ use std::ops::{Add, Div, Mul, Rem, Sub}; -use crate::series::Series; - use common_error::DaftResult; +use crate::series::Series; + macro_rules! impl_arithmetic_for_series { ($trait:ident, $op:ident) => { impl $trait for &Series { @@ -30,11 +30,14 @@ impl_arithmetic_for_series!(Rem, rem); #[cfg(test)] mod tests { - use crate::array::ops::full::FullNull; - use crate::datatypes::{DataType, Float64Array, Int64Array, Utf8Array}; - use crate::series::IntoSeries; use common_error::DaftResult; + use crate::{ + array::ops::full::FullNull, + datatypes::{DataType, Float64Array, Int64Array, Utf8Array}, + series::IntoSeries, + }; + #[test] fn add_int_and_int() -> DaftResult<()> { let a = Int64Array::from(("a", vec![1, 2, 3])); diff --git a/src/daft-core/src/series/ops/between.rs b/src/daft-core/src/series/ops/between.rs index c54e06f83f..55f9c2e283 100644 --- a/src/daft-core/src/series/ops/between.rs +++ b/src/daft-core/src/series/ops/between.rs @@ -1,5 +1,7 @@ use common_error::DaftResult; +#[cfg(feature = "python")] +use crate::series::ops::py_between_op_utilfn; use crate::{ array::ops::DaftBetween, datatypes::{BooleanArray, DataType, InferDataType}, @@ -7,9 +9,6 @@ use crate::{ with_match_numeric_daft_types, }; -#[cfg(feature = "python")] -use crate::series::ops::py_between_op_utilfn; - impl Series { pub fn between(&self, lower: &Series, upper: &Series) -> DaftResult { let (_output_type, _intermediate, lower_comp_type) = InferDataType::from(self.data_type()) @@ -53,10 +52,10 @@ impl Series { #[cfg(test)] mod tests { - use crate::{datatypes::DataType, series::Series}; - use common_error::DaftResult; + use crate::{datatypes::DataType, series::Series}; + #[test] fn test_between_all_null() -> DaftResult<()> { let value = Series::full_null("value", &DataType::Null, 2); diff --git a/src/daft-core/src/series/ops/broadcast.rs b/src/daft-core/src/series/ops/broadcast.rs index 7c3c0fbe90..809364c4b8 100644 --- a/src/daft-core/src/series/ops/broadcast.rs +++ b/src/daft-core/src/series/ops/broadcast.rs @@ -1,6 +1,6 @@ -use crate::series::Series; - use common_error::DaftResult; + +use crate::series::Series; impl Series { pub fn broadcast(&self, num: usize) -> DaftResult { self.inner.broadcast(num) @@ -9,11 +9,14 @@ impl Series { #[cfg(test)] mod tests { - use crate::array::ops::full::FullNull; - use crate::datatypes::{DataType, Int64Array, Utf8Array}; - use crate::series::array_impl::IntoSeries; use common_error::DaftResult; + use crate::{ + array::ops::full::FullNull, + datatypes::{DataType, Int64Array, Utf8Array}, + series::array_impl::IntoSeries, + }; + #[test] fn broadcast_int() -> DaftResult<()> { let a = Int64Array::from(("a", vec![1])).into_series(); diff --git a/src/daft-core/src/series/ops/cast.rs b/src/daft-core/src/series/ops/cast.rs index e45947b376..3d31edddf4 100644 --- a/src/daft-core/src/series/ops/cast.rs +++ b/src/daft-core/src/series/ops/cast.rs @@ -1,6 +1,7 @@ -use crate::{datatypes::DataType, series::Series}; use common_error::DaftResult; +use crate::{datatypes::DataType, series::Series}; + impl Series { pub fn cast(&self, datatype: &DataType) -> DaftResult { self.inner.cast(datatype) diff --git a/src/daft-core/src/series/ops/cbrt.rs b/src/daft-core/src/series/ops/cbrt.rs index 31e926b1b7..3d6cb2b8de 100644 --- a/src/daft-core/src/series/ops/cbrt.rs +++ b/src/daft-core/src/series/ops/cbrt.rs @@ -1,8 +1,9 @@ use common_error::DaftResult; -use crate::datatypes::DataType; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; +use crate::{ + datatypes::DataType, + series::{array_impl::IntoSeries, Series}, +}; impl Series { pub fn cbrt(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/ceil.rs b/src/daft-core/src/series/ops/ceil.rs index 17cb55beae..0097acc353 100644 --- a/src/daft-core/src/series/ops/ceil.rs +++ b/src/daft-core/src/series/ops/ceil.rs @@ -1,8 +1,9 @@ -use crate::datatypes::DataType; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{ + datatypes::DataType, + series::{array_impl::IntoSeries, Series}, +}; impl Series { pub fn ceil(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/comparison.rs b/src/daft-core/src/series/ops/comparison.rs index efdc7ca708..df411a6c9f 100644 --- a/src/daft-core/src/series/ops/comparison.rs +++ b/src/daft-core/src/series/ops/comparison.rs @@ -1,9 +1,10 @@ +use common_error::DaftResult; + use crate::{ array::ops::{DaftCompare, DaftLogical}, datatypes::BooleanArray, series::Series, }; -use common_error::DaftResult; macro_rules! call_inner { ($fname:ident) => { diff --git a/src/daft-core/src/series/ops/concat.rs b/src/daft-core/src/series/ops/concat.rs index 3341533099..c83038ef82 100644 --- a/src/daft-core/src/series/ops/concat.rs +++ b/src/daft-core/src/series/ops/concat.rs @@ -1,7 +1,10 @@ -use crate::series::{IntoSeries, Series}; -use crate::with_match_daft_types; use common_error::{DaftError, DaftResult}; +use crate::{ + series::{IntoSeries, Series}, + with_match_daft_types, +}; + impl Series { pub fn concat(series: &[&Series]) -> DaftResult { if series.is_empty() { diff --git a/src/daft-core/src/series/ops/downcast.rs b/src/daft-core/src/series/ops/downcast.rs index 8c85dbef39..3dd4bfe530 100644 --- a/src/daft-core/src/series/ops/downcast.rs +++ b/src/daft-core/src/series/ops/downcast.rs @@ -1,14 +1,15 @@ -use crate::array::{ListArray, StructArray}; -use crate::datatypes::logical::{ - DateArray, Decimal128Array, FixedShapeImageArray, TimeArray, TimestampArray, -}; -use crate::datatypes::*; -use crate::series::array_impl::ArrayWrapper; -use crate::series::Series; use common_error::DaftResult; use logical::{EmbeddingArray, FixedShapeTensorArray, TensorArray}; use self::logical::{DurationArray, ImageArray, MapArray}; +use crate::{ + array::{ListArray, StructArray}, + datatypes::{ + logical::{DateArray, Decimal128Array, FixedShapeImageArray, TimeArray, TimestampArray}, + *, + }, + series::{array_impl::ArrayWrapper, Series}, +}; impl Series { pub fn downcast(&self) -> DaftResult<&Arr> { diff --git a/src/daft-core/src/series/ops/exp.rs b/src/daft-core/src/series/ops/exp.rs index f52c44ff2d..95a43fc3c2 100644 --- a/src/daft-core/src/series/ops/exp.rs +++ b/src/daft-core/src/series/ops/exp.rs @@ -1,9 +1,9 @@ -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; -use crate::datatypes::DataType; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; +use crate::{ + datatypes::DataType, + series::{array_impl::IntoSeries, Series}, +}; impl Series { pub fn exp(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/filter.rs b/src/daft-core/src/series/ops/filter.rs index 327a209541..06488ac92e 100644 --- a/src/daft-core/src/series/ops/filter.rs +++ b/src/daft-core/src/series/ops/filter.rs @@ -1,6 +1,7 @@ -use crate::{datatypes::BooleanArray, series::Series}; use common_error::{DaftError, DaftResult}; +use crate::{datatypes::BooleanArray, series::Series}; + impl Series { pub fn filter(&self, mask: &BooleanArray) -> DaftResult { match (self.len(), mask.len()) { diff --git a/src/daft-core/src/series/ops/float.rs b/src/daft-core/src/series/ops/float.rs index 05af1c7a45..92fc9c48b7 100644 --- a/src/daft-core/src/series/ops/float.rs +++ b/src/daft-core/src/series/ops/float.rs @@ -1,8 +1,9 @@ -use crate::{series::Series, with_match_float_and_null_daft_types}; - use common_error::DaftResult; -use crate::series::array_impl::IntoSeries; +use crate::{ + series::{array_impl::IntoSeries, Series}, + with_match_float_and_null_daft_types, +}; impl Series { pub fn is_nan(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/floor.rs b/src/daft-core/src/series/ops/floor.rs index 59ae87fcf1..c574c3da3e 100644 --- a/src/daft-core/src/series/ops/floor.rs +++ b/src/daft-core/src/series/ops/floor.rs @@ -1,8 +1,9 @@ -use crate::datatypes::DataType; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{ + datatypes::DataType, + series::{array_impl::IntoSeries, Series}, +}; impl Series { pub fn floor(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/groups.rs b/src/daft-core/src/series/ops/groups.rs index f2886e4876..db406e8d87 100644 --- a/src/daft-core/src/series/ops/groups.rs +++ b/src/daft-core/src/series/ops/groups.rs @@ -1,9 +1,10 @@ +use common_error::DaftResult; + use crate::{ array::ops::{GroupIndicesPair, IntoGroups}, series::Series, with_match_hashable_daft_types, }; -use common_error::DaftResult; impl IntoGroups for Series { fn make_groups(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/hash.rs b/src/daft-core/src/series/ops/hash.rs index 95bcbe90cf..5355353c62 100644 --- a/src/daft-core/src/series/ops/hash.rs +++ b/src/daft-core/src/series/ops/hash.rs @@ -1,10 +1,11 @@ +use arrow2::bitmap::Bitmap; +use common_error::DaftResult; + use crate::{ datatypes::{DataType, Int32Array, UInt64Array}, series::Series, with_match_hashable_daft_types, }; -use arrow2::bitmap::Bitmap; -use common_error::DaftResult; impl Series { pub fn hash(&self, seed: Option<&UInt64Array>) -> DaftResult { diff --git a/src/daft-core/src/series/ops/if_else.rs b/src/daft-core/src/series/ops/if_else.rs index e782f2a809..26cc553f54 100644 --- a/src/daft-core/src/series/ops/if_else.rs +++ b/src/daft-core/src/series/ops/if_else.rs @@ -1,8 +1,8 @@ +use common_error::DaftResult; + use super::cast_series_to_supertype; use crate::series::Series; -use common_error::DaftResult; - impl Series { pub fn if_else(&self, other: &Series, predicate: &Series) -> DaftResult { let casted_series = cast_series_to_supertype(&[self, other])?; diff --git a/src/daft-core/src/series/ops/is_in.rs b/src/daft-core/src/series/ops/is_in.rs index 264b080ce4..7acc601a37 100644 --- a/src/daft-core/src/series/ops/is_in.rs +++ b/src/daft-core/src/series/ops/is_in.rs @@ -1,5 +1,7 @@ use common_error::DaftResult; +#[cfg(feature = "python")] +use crate::series::ops::py_membership_op_utilfn; use crate::{ array::ops::DaftIsIn, datatypes::{BooleanArray, DataType, InferDataType}, @@ -7,9 +9,6 @@ use crate::{ with_match_comparable_daft_types, }; -#[cfg(feature = "python")] -use crate::series::ops::py_membership_op_utilfn; - fn default(name: &str, size: usize) -> DaftResult { Ok(BooleanArray::from((name, vec![false; size].as_slice())).into_series()) } diff --git a/src/daft-core/src/series/ops/len.rs b/src/daft-core/src/series/ops/len.rs index 4e40d3c3ca..dbbf56211e 100644 --- a/src/daft-core/src/series/ops/len.rs +++ b/src/daft-core/src/series/ops/len.rs @@ -1,6 +1,7 @@ -use crate::series::Series; use common_error::DaftResult; +use crate::series::Series; + impl Series { pub fn len(&self) -> usize { self.inner.len() diff --git a/src/daft-core/src/series/ops/list.rs b/src/daft-core/src/series/ops/list.rs index 5590e74ef8..d00f5440c2 100644 --- a/src/daft-core/src/series/ops/list.rs +++ b/src/daft-core/src/series/ops/list.rs @@ -1,10 +1,10 @@ -use crate::datatypes::{DataType, UInt64Array, Utf8Array}; -use crate::prelude::CountMode; -use crate::series::IntoSeries; -use crate::series::Series; -use common_error::DaftError; +use common_error::{DaftError, DaftResult}; -use common_error::DaftResult; +use crate::{ + datatypes::{DataType, UInt64Array, Utf8Array}, + prelude::CountMode, + series::{IntoSeries, Series}, +}; impl Series { pub fn explode(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/log.rs b/src/daft-core/src/series/ops/log.rs index cb22175df8..73ded27a88 100644 --- a/src/daft-core/src/series/ops/log.rs +++ b/src/daft-core/src/series/ops/log.rs @@ -1,8 +1,9 @@ -use crate::datatypes::DataType; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{ + datatypes::DataType, + series::{array_impl::IntoSeries, Series}, +}; impl Series { pub fn log2(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/map.rs b/src/daft-core/src/series/ops/map.rs index 436e2e7ab5..85461b1fe0 100644 --- a/src/daft-core/src/series/ops/map.rs +++ b/src/daft-core/src/series/ops/map.rs @@ -1,7 +1,6 @@ -use crate::datatypes::DataType; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{datatypes::DataType, series::Series}; impl Series { pub fn map_get(&self, key: &Series) -> DaftResult { diff --git a/src/daft-core/src/series/ops/mod.rs b/src/daft-core/src/series/ops/mod.rs index d527d500f4..59f04fcd6b 100644 --- a/src/daft-core/src/series/ops/mod.rs +++ b/src/daft-core/src/series/ops/mod.rs @@ -1,7 +1,7 @@ -use crate::utils::supertype::try_get_supertype; use common_error::DaftResult; use super::Series; +use crate::utils::supertype::try_get_supertype; pub mod abs; pub mod agg; @@ -57,10 +57,10 @@ pub fn cast_series_to_supertype(series: &[&Series]) -> DaftResult> { #[cfg(feature = "python")] macro_rules! py_binary_op_utilfn { ($lhs:expr, $rhs:expr, $pyoperator:expr, $utilfn:expr) => {{ - use crate::datatypes::DataType; - use crate::python::PySeries; use pyo3::prelude::*; + use crate::{datatypes::DataType, python::PySeries}; + let lhs = $lhs.cast(&DataType::Python)?; let rhs = $rhs.cast(&DataType::Python)?; @@ -99,10 +99,10 @@ pub(super) use py_binary_op_utilfn; #[cfg(feature = "python")] pub(super) fn py_membership_op_utilfn(lhs: &Series, rhs: &Series) -> DaftResult { - use crate::datatypes::DataType; - use crate::python::PySeries; use pyo3::prelude::*; + use crate::{datatypes::DataType, python::PySeries}; + let lhs_casted = lhs.cast(&DataType::Python)?; let rhs_casted = rhs.cast(&DataType::Python)?; @@ -136,10 +136,10 @@ pub(super) fn py_between_op_utilfn( lower: &Series, upper: &Series, ) -> DaftResult { - use crate::datatypes::DataType; - use crate::python::PySeries; use pyo3::prelude::*; + use crate::{datatypes::DataType, python::PySeries}; + let value_casted = value.cast(&DataType::Python)?; let lower_casted = lower.cast(&DataType::Python)?; let upper_casted = upper.cast(&DataType::Python)?; diff --git a/src/daft-core/src/series/ops/not.rs b/src/daft-core/src/series/ops/not.rs index 41638c5246..c6a8216614 100644 --- a/src/daft-core/src/series/ops/not.rs +++ b/src/daft-core/src/series/ops/not.rs @@ -1,10 +1,12 @@ use std::ops::Not; -use crate::datatypes::BooleanArray; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; use common_error::DaftResult; +use crate::{ + datatypes::BooleanArray, + series::{array_impl::IntoSeries, Series}, +}; + impl Not for &Series { type Output = DaftResult; fn not(self) -> Self::Output { diff --git a/src/daft-core/src/series/ops/null.rs b/src/daft-core/src/series/ops/null.rs index 685579f6c8..00df3b5860 100644 --- a/src/daft-core/src/series/ops/null.rs +++ b/src/daft-core/src/series/ops/null.rs @@ -1,7 +1,7 @@ -use crate::series::Series; - use common_error::DaftResult; +use crate::series::Series; + impl Series { pub fn is_null(&self) -> DaftResult { self.inner.is_null() diff --git a/src/daft-core/src/series/ops/partitioning.rs b/src/daft-core/src/series/ops/partitioning.rs index c9d81595ca..73b942ca00 100644 --- a/src/daft-core/src/series/ops/partitioning.rs +++ b/src/daft-core/src/series/ops/partitioning.rs @@ -1,11 +1,12 @@ -use crate::array::ops::as_arrow::AsArrow; -use crate::datatypes::logical::TimestampArray; -use crate::datatypes::{Int32Array, Int64Array, TimeUnit}; -use crate::series::array_impl::IntoSeries; -use crate::with_match_integer_daft_types; -use crate::{datatypes::DataType, series::Series}; use common_error::{DaftError, DaftResult}; +use crate::{ + array::ops::as_arrow::AsArrow, + datatypes::{logical::TimestampArray, DataType, Int32Array, Int64Array, TimeUnit}, + series::{array_impl::IntoSeries, Series}, + with_match_integer_daft_types, +}; + impl Series { pub fn partitioning_years(&self) -> DaftResult { let epoch_year = Int32Array::from(("1970", vec![1970])).into_series(); diff --git a/src/daft-core/src/series/ops/repeat.rs b/src/daft-core/src/series/ops/repeat.rs index 8b2c06518d..eddde7e463 100644 --- a/src/daft-core/src/series/ops/repeat.rs +++ b/src/daft-core/src/series/ops/repeat.rs @@ -1,8 +1,8 @@ -use crate::series::Series; - use common_error::DaftResult; use itertools::Itertools; +use crate::series::Series; + impl Series { pub(crate) fn repeat(&self, n: usize) -> DaftResult { let many_self = std::iter::repeat(self).take(n).collect_vec(); diff --git a/src/daft-core/src/series/ops/round.rs b/src/daft-core/src/series/ops/round.rs index a784e29638..6f968063fb 100644 --- a/src/daft-core/src/series/ops/round.rs +++ b/src/daft-core/src/series/ops/round.rs @@ -1,8 +1,9 @@ -use crate::datatypes::DataType; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{ + datatypes::DataType, + series::{array_impl::IntoSeries, Series}, +}; impl Series { pub fn round(&self, decimal: i32) -> DaftResult { diff --git a/src/daft-core/src/series/ops/search_sorted.rs b/src/daft-core/src/series/ops/search_sorted.rs index 59f2877b1e..7a2f2b3e2c 100644 --- a/src/daft-core/src/series/ops/search_sorted.rs +++ b/src/daft-core/src/series/ops/search_sorted.rs @@ -1,9 +1,10 @@ +use common_error::DaftResult; + use crate::{ datatypes::UInt64Array, series::{ops::cast_series_to_supertype, Series}, with_match_comparable_daft_types, }; -use common_error::DaftResult; impl Series { pub fn search_sorted(&self, keys: &Self, descending: bool) -> DaftResult { diff --git a/src/daft-core/src/series/ops/shift.rs b/src/daft-core/src/series/ops/shift.rs index d4ecf692d8..1ba5275ae5 100644 --- a/src/daft-core/src/series/ops/shift.rs +++ b/src/daft-core/src/series/ops/shift.rs @@ -1,7 +1,6 @@ -use crate::datatypes::DataType; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{datatypes::DataType, series::Series}; impl Series { pub fn shift_left(&self, bits: &Self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/sign.rs b/src/daft-core/src/series/ops/sign.rs index 6c93183a4a..53ecb67088 100644 --- a/src/daft-core/src/series/ops/sign.rs +++ b/src/daft-core/src/series/ops/sign.rs @@ -1,8 +1,9 @@ -use crate::datatypes::DataType; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{ + datatypes::DataType, + series::{array_impl::IntoSeries, Series}, +}; impl Series { pub fn sign(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/sketch_percentile.rs b/src/daft-core/src/series/ops/sketch_percentile.rs index c5f3dc4f7d..23e85b6d89 100644 --- a/src/daft-core/src/series/ops/sketch_percentile.rs +++ b/src/daft-core/src/series/ops/sketch_percentile.rs @@ -1,6 +1,6 @@ +use common_error::{DaftError, DaftResult}; + use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; impl Series { pub fn sketch_percentile( diff --git a/src/daft-core/src/series/ops/sort.rs b/src/daft-core/src/series/ops/sort.rs index da66c40e0b..4c591cc744 100644 --- a/src/daft-core/src/series/ops/sort.rs +++ b/src/daft-core/src/series/ops/sort.rs @@ -1,9 +1,9 @@ -use crate::{series::Series, with_match_comparable_daft_types}; -use common_error::DaftError; +use common_error::{DaftError, DaftResult}; -use common_error::DaftResult; - -use crate::series::array_impl::IntoSeries; +use crate::{ + series::{array_impl::IntoSeries, Series}, + with_match_comparable_daft_types, +}; impl Series { pub fn argsort(&self, descending: bool) -> DaftResult { diff --git a/src/daft-core/src/series/ops/sqrt.rs b/src/daft-core/src/series/ops/sqrt.rs index ca78c0764c..614f8f9fc2 100644 --- a/src/daft-core/src/series/ops/sqrt.rs +++ b/src/daft-core/src/series/ops/sqrt.rs @@ -1,8 +1,9 @@ use common_error::DaftResult; -use crate::datatypes::DataType; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; +use crate::{ + datatypes::DataType, + series::{array_impl::IntoSeries, Series}, +}; impl Series { pub fn sqrt(&self) -> DaftResult { diff --git a/src/daft-core/src/series/ops/struct_.rs b/src/daft-core/src/series/ops/struct_.rs index 01322c9ab3..4dabb8b0f3 100644 --- a/src/daft-core/src/series/ops/struct_.rs +++ b/src/daft-core/src/series/ops/struct_.rs @@ -1,7 +1,6 @@ -use crate::datatypes::DataType; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{datatypes::DataType, series::Series}; impl Series { pub fn struct_get(&self, name: &str) -> DaftResult { diff --git a/src/daft-core/src/series/ops/take.rs b/src/daft-core/src/series/ops/take.rs index c400a0e5a3..9ff218757e 100644 --- a/src/daft-core/src/series/ops/take.rs +++ b/src/daft-core/src/series/ops/take.rs @@ -1,12 +1,12 @@ +use arrow2::types::IndexRange; +use common_display::table_display::StrValue; +use common_error::DaftResult; + use crate::{ datatypes::Utf8Array, series::{IntoSeries, Series}, }; -use arrow2::types::IndexRange; -use common_display::table_display::StrValue; -use common_error::DaftResult; - impl Series { pub fn head(&self, num: usize) -> DaftResult { if num >= self.len() { diff --git a/src/daft-core/src/series/ops/time.rs b/src/daft-core/src/series/ops/time.rs index a87298f290..d66520eaa1 100644 --- a/src/daft-core/src/series/ops/time.rs +++ b/src/daft-core/src/series/ops/time.rs @@ -1,9 +1,10 @@ -use crate::datatypes::TimeUnit; -use crate::series::array_impl::IntoSeries; -use crate::{datatypes::DataType, series::Series}; - use common_error::{DaftError, DaftResult}; +use crate::{ + datatypes::{DataType, TimeUnit}, + series::{array_impl::IntoSeries, Series}, +}; + impl Series { pub fn dt_date(&self) -> DaftResult { match self.data_type() { diff --git a/src/daft-core/src/series/ops/trigonometry.rs b/src/daft-core/src/series/ops/trigonometry.rs index 9daf8ade25..25bac22548 100644 --- a/src/daft-core/src/series/ops/trigonometry.rs +++ b/src/daft-core/src/series/ops/trigonometry.rs @@ -1,10 +1,10 @@ -use crate::array::ops::trigonometry::TrigonometricFunction; -use crate::array::ops::DaftAtan2; -use crate::datatypes::DataType; -use crate::series::IntoSeries; -use crate::series::Series; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::{ + array::ops::{trigonometry::TrigonometricFunction, DaftAtan2}, + datatypes::DataType, + series::{IntoSeries, Series}, +}; impl Series { pub fn trigonometry(&self, trig_function: &TrigonometricFunction) -> DaftResult { diff --git a/src/daft-core/src/series/ops/utf8.rs b/src/daft-core/src/series/ops/utf8.rs index 4d307dfee1..d4fe19bde3 100644 --- a/src/daft-core/src/series/ops/utf8.rs +++ b/src/daft-core/src/series/ops/utf8.rs @@ -1,9 +1,12 @@ -use crate::array::ops::{PadPlacement, Utf8NormalizeOptions}; -use crate::series::array_impl::IntoSeries; -use crate::series::Series; -use crate::{datatypes::*, with_match_integer_daft_types}; use common_error::{DaftError, DaftResult}; +use crate::{ + array::ops::{PadPlacement, Utf8NormalizeOptions}, + datatypes::*, + series::{array_impl::IntoSeries, Series}, + with_match_integer_daft_types, +}; + impl Series { pub fn with_utf8_array( &self, diff --git a/src/daft-core/src/series/serdes.rs b/src/daft-core/src/series/serdes.rs index 9a550e11c2..ed1bb21927 100644 --- a/src/daft-core/src/series/serdes.rs +++ b/src/daft-core/src/series/serdes.rs @@ -3,16 +3,17 @@ use std::{borrow::Cow, sync::Arc}; use arrow2::offset::OffsetsBuffer; use serde::{de::Visitor, Deserializer}; -use crate::datatypes::*; - use crate::{ array::{ ops::{as_arrow::AsArrow, full::FullNull}, ListArray, StructArray, }, - datatypes::logical::{ - DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeTensorArray, ImageArray, MapArray, TensorArray, TimeArray, TimestampArray, + datatypes::{ + logical::{ + DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, + FixedShapeTensorArray, ImageArray, MapArray, TensorArray, TimeArray, TimestampArray, + }, + *, }, series::{IntoSeries, Series}, with_match_daft_types, diff --git a/src/daft-core/src/series/series_like.rs b/src/daft-core/src/series/series_like.rs index 10d75b7540..463892c8bd 100644 --- a/src/daft-core/src/series/series_like.rs +++ b/src/daft-core/src/series/series_like.rs @@ -1,12 +1,12 @@ use std::any::Any; +use common_error::DaftResult; + +use super::Series; use crate::{ array::ops::GroupIndices, datatypes::{BooleanArray, DataType, Field}, }; -use common_error::DaftResult; - -use super::Series; pub trait SeriesLike: Send + Sync + Any + std::fmt::Debug { #[allow(clippy::wrong_self_convention)] fn into_series(&self) -> Series; diff --git a/src/daft-core/src/utils/arrow.rs b/src/daft-core/src/utils/arrow.rs index a3690a837a..8e99be3897 100644 --- a/src/daft-core/src/utils/arrow.rs +++ b/src/daft-core/src/utils/arrow.rs @@ -1,8 +1,7 @@ -use lazy_static::lazy_static; -use std::collections::HashMap; -use std::sync::Mutex; +use std::{collections::HashMap, sync::Mutex}; use arrow2::compute::cast; +use lazy_static::lazy_static; // TODO(Clark): Refactor to GILOnceCell in order to avoid deadlock between the below mutex and the Python GIL. lazy_static! { diff --git a/src/daft-core/src/utils/display.rs b/src/daft-core/src/utils/display.rs index 7b1045f5fb..37593cda8a 100644 --- a/src/daft-core/src/utils/display.rs +++ b/src/daft-core/src/utils/display.rs @@ -1,7 +1,8 @@ -use crate::{datatypes::TimeUnit, series::Series}; use common_display::table_display::StrValue; use itertools::Itertools; +use crate::{datatypes::TimeUnit, series::Series}; + pub fn display_date32(val: i32) -> String { let epoch_date = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); let date = if val.is_positive() { diff --git a/src/daft-core/src/utils/dyn_compare.rs b/src/daft-core/src/utils/dyn_compare.rs index d9903da56f..e83f5bba4d 100644 --- a/src/daft-core/src/utils/dyn_compare.rs +++ b/src/daft-core/src/utils/dyn_compare.rs @@ -1,15 +1,13 @@ use std::cmp::Ordering; -use crate::datatypes::DataType; - +use arrow2::array::{ + dyn_ord::{build_dyn_array_compare, DynArrayComparator}, + Array, +}; +use common_error::{DaftError, DaftResult}; use daft_schema::schema::Schema; -use arrow2::array::Array; -use common_error::DaftError; -use common_error::DaftResult; - -use arrow2::array::dyn_ord::build_dyn_array_compare; -use arrow2::array::dyn_ord::DynArrayComparator; +use crate::datatypes::DataType; pub type MultiDynArrayComparator = Box], &[Box], usize, usize) -> Ordering + Send + Sync>; diff --git a/src/daft-core/src/utils/supertype.rs b/src/daft-core/src/utils/supertype.rs index 3133be89ad..0ee0d50966 100644 --- a/src/daft-core/src/utils/supertype.rs +++ b/src/daft-core/src/utils/supertype.rs @@ -1,7 +1,6 @@ -use crate::datatypes::DataType; -use crate::datatypes::TimeUnit; -use common_error::DaftError; -use common_error::DaftResult; +use common_error::{DaftError, DaftResult}; + +use crate::datatypes::{DataType, TimeUnit}; // TODO: Deprecate this logic soon! diff --git a/src/daft-csv/Cargo.toml b/src/daft-csv/Cargo.toml index 4e58223843..d672e30f35 100644 --- a/src/daft-csv/Cargo.toml +++ b/src/daft-csv/Cargo.toml @@ -1,7 +1,6 @@ [dependencies] arrow2 = {workspace = true, features = ["io_csv", "io_csv_async"]} async-compat = {workspace = true} -async-compression = {workspace = true} async-stream = {workspace = true} common-error = {path = "../common/error", default-features = false} common-py-serde = {path = "../common/py-serde", default-features = false} @@ -19,7 +18,6 @@ serde = {workspace = true} snafu = {workspace = true} tokio = {workspace = true} tokio-util = {workspace = true} -url = {workspace = true} [dev-dependencies] rstest = {workspace = true} diff --git a/src/daft-csv/src/compression.rs b/src/daft-csv/src/compression.rs deleted file mode 100644 index 268b1566d9..0000000000 --- a/src/daft-csv/src/compression.rs +++ /dev/null @@ -1,66 +0,0 @@ -use async_compression::tokio::bufread::{ - BrotliDecoder, BzDecoder, DeflateDecoder, GzipDecoder, LzmaDecoder, XzDecoder, ZlibDecoder, - ZstdDecoder, -}; -use std::{path::PathBuf, pin::Pin}; -use tokio::io::{AsyncBufRead, AsyncRead}; -use url::Url; - -#[derive(Debug)] -pub enum CompressionCodec { - Brotli, - Bz, - Deflate, - Gzip, - Lzma, - Xz, - Zlib, - Zstd, -} - -impl CompressionCodec { - pub fn from_uri(uri: &str) -> Option { - let url = Url::parse(uri); - let path = match &url { - Ok(url) => url.path(), - _ => uri, - }; - let extension = PathBuf::from(path) - .extension()? - .to_string_lossy() - .to_string(); - Self::from_extension(extension.as_ref()) - } - pub fn from_extension(extension: &str) -> Option { - use CompressionCodec::*; - match extension { - "br" => Some(Brotli), - "bz2" => Some(Bz), - "deflate" => Some(Deflate), - "gz" => Some(Gzip), - "lzma" => Some(Lzma), - "xz" => Some(Xz), - "zl" => Some(Zlib), - "zstd" | "zst" => Some(Zstd), - "snappy" => todo!("Snappy compression support not yet implemented"), - _ => None, - } - } - - pub fn to_decoder( - &self, - reader: T, - ) -> Pin> { - use CompressionCodec::*; - match self { - Brotli => Box::pin(BrotliDecoder::new(reader)), - Bz => Box::pin(BzDecoder::new(reader)), - Deflate => Box::pin(DeflateDecoder::new(reader)), - Gzip => Box::pin(GzipDecoder::new(reader)), - Lzma => Box::pin(LzmaDecoder::new(reader)), - Xz => Box::pin(XzDecoder::new(reader)), - Zlib => Box::pin(ZlibDecoder::new(reader)), - Zstd => Box::pin(ZstdDecoder::new(reader)), - } - } -} diff --git a/src/daft-csv/src/metadata.rs b/src/daft-csv/src/metadata.rs index 2625643b68..c8add38d96 100644 --- a/src/daft-csv/src/metadata.rs +++ b/src/daft-csv/src/metadata.rs @@ -4,7 +4,9 @@ use arrow2::io::csv::read_async::{AsyncReader, AsyncReaderBuilder}; use async_compat::CompatExt; use common_error::DaftResult; use csv_async::ByteRecord; +use daft_compression::CompressionCodec; use daft_core::prelude::Schema; +use daft_decoding::inference::infer; use daft_io::{get_runtime, GetResult, IOClient, IOStatsRef}; use futures::{StreamExt, TryStreamExt}; use snafu::ResultExt; @@ -15,8 +17,6 @@ use tokio::{ use tokio_util::io::StreamReader; use crate::{schema::merge_schema, CsvParseOptions}; -use daft_compression::CompressionCodec; -use daft_decoding::inference::infer; const DEFAULT_COLUMN_PREFIX: &str = "column_"; @@ -294,9 +294,8 @@ mod tests { use daft_io::{IOClient, IOConfig}; use rstest::rstest; - use crate::CsvParseOptions; - use super::read_csv_schema; + use crate::CsvParseOptions; #[rstest] fn test_csv_schema_local( diff --git a/src/daft-csv/src/read.rs b/src/daft-csv/src/read.rs index fed74d88e1..c0332feca8 100644 --- a/src/daft-csv/src/read.rs +++ b/src/daft-csv/src/read.rs @@ -7,8 +7,9 @@ use arrow2::{ use async_compat::{Compat, CompatExt}; use common_error::{DaftError, DaftResult}; use csv_async::AsyncReader; +use daft_compression::CompressionCodec; use daft_core::{prelude::*, utils::arrow::cast_array_for_daft_if_needed}; - +use daft_decoding::deserialize::deserialize_column; use daft_dsl::optimization::get_required_columns; use daft_io::{get_runtime, GetResult, IOClient, IOStatsRef}; use daft_table::Table; @@ -28,10 +29,10 @@ use tokio::{ }; use tokio_util::io::StreamReader; -use crate::ArrowSnafu; -use crate::{metadata::read_csv_schema_single, CsvConvertOptions, CsvParseOptions, CsvReadOptions}; -use daft_compression::CompressionCodec; -use daft_decoding::deserialize::deserialize_column; +use crate::{ + metadata::read_csv_schema_single, ArrowSnafu, CsvConvertOptions, CsvParseOptions, + CsvReadOptions, +}; trait ByteRecordChunkStream: Stream>> {} impl ByteRecordChunkStream for S where S: Stream>> {} @@ -663,24 +664,21 @@ fn fields_to_projection_indices( mod tests { use std::sync::Arc; - use common_error::{DaftError, DaftResult}; - use arrow2::io::csv::read::{ deserialize_batch, deserialize_column, infer, infer_schema, read_rows, ByteRecord, ReaderBuilder, }; + use common_error::{DaftError, DaftResult}; use daft_core::{ prelude::*, utils::arrow::{cast_array_for_daft_if_needed, cast_array_from_daft_if_needed}, }; - use daft_io::{IOClient, IOConfig}; use daft_table::Table; use rstest::rstest; - use crate::{char_to_byte, CsvConvertOptions, CsvParseOptions, CsvReadOptions}; - use super::read_csv; + use crate::{char_to_byte, CsvConvertOptions, CsvParseOptions, CsvReadOptions}; #[allow(clippy::too_many_arguments)] fn check_equal_local_arrow2( diff --git a/src/daft-dsl/src/arithmetic.rs b/src/daft-dsl/src/arithmetic.rs index bac4f30f44..95faa64074 100644 --- a/src/daft-dsl/src/arithmetic.rs +++ b/src/daft-dsl/src/arithmetic.rs @@ -1,5 +1,4 @@ -use crate::ExprRef; -use crate::{Expr, Operator}; +use crate::{Expr, ExprRef, Operator}; macro_rules! impl_expr_op { ($func_name:ident, $op_name: ident) => { @@ -25,9 +24,10 @@ impl_expr_op!(rem, Modulus); #[cfg(test)] mod tests { - use crate::{col, Expr}; use common_error::{DaftError, DaftResult}; + use crate::{col, Expr}; + #[test] fn check_add_expr_type() -> DaftResult<()> { let a = col("a"); diff --git a/src/daft-dsl/src/expr.rs b/src/daft-dsl/src/expr.rs index 0afbe81c2e..affb5f08e3 100644 --- a/src/daft-dsl/src/expr.rs +++ b/src/daft-dsl/src/expr.rs @@ -1,3 +1,9 @@ +use std::{ + io::{self, Write}, + sync::Arc, +}; + +use common_error::{DaftError, DaftResult}; use common_hashable_float_wrapper::FloatWrapper; use common_treenode::TreeNode; use daft_core::{ @@ -5,8 +11,11 @@ use daft_core::{ prelude::*, utils::supertype::try_get_supertype, }; +use derive_more::Display; use itertools::Itertools; +use serde::{Deserialize, Serialize}; +use super::functions::FunctionExpr; use crate::{ functions::{ binary_op_display_without_formatter, function_display_without_formatter, @@ -21,17 +30,6 @@ use crate::{ optimization::{get_required_columns, requires_computation}, }; -use common_error::{DaftError, DaftResult}; - -use derive_more::Display; -use serde::{Deserialize, Serialize}; -use std::{ - io::{self, Write}, - sync::Arc, -}; - -use super::functions::FunctionExpr; - pub type ExprRef = Arc; #[derive(Display, Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] diff --git a/src/daft-dsl/src/functions/list/chunk.rs b/src/daft-dsl/src/functions/list/chunk.rs index 2d1676a24e..60f4567828 100644 --- a/src/daft-dsl/src/functions/list/chunk.rs +++ b/src/daft-dsl/src/functions/list/chunk.rs @@ -1,9 +1,8 @@ -use crate::ExprRef; +use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; use super::{super::FunctionEvaluator, ListExpr}; -use crate::functions::FunctionExpr; -use common_error::{DaftError, DaftResult}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct ChunkEvaluator {} diff --git a/src/daft-dsl/src/functions/list/count.rs b/src/daft-dsl/src/functions/list/count.rs index 565a569720..8392e7353f 100644 --- a/src/daft-dsl/src/functions/list/count.rs +++ b/src/daft-dsl/src/functions/list/count.rs @@ -1,9 +1,8 @@ -use crate::{functions::FunctionExpr, ExprRef}; -use daft_core::prelude::*; - use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, ListExpr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct CountEvaluator {} diff --git a/src/daft-dsl/src/functions/list/explode.rs b/src/daft-dsl/src/functions/list/explode.rs index e3c9716b01..6065ec4486 100644 --- a/src/daft-dsl/src/functions/list/explode.rs +++ b/src/daft-dsl/src/functions/list/explode.rs @@ -1,9 +1,8 @@ -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct ExplodeEvaluator {} diff --git a/src/daft-dsl/src/functions/list/get.rs b/src/daft-dsl/src/functions/list/get.rs index 69543bb5cd..3325603e13 100644 --- a/src/daft-dsl/src/functions/list/get.rs +++ b/src/daft-dsl/src/functions/list/get.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct GetEvaluator {} diff --git a/src/daft-dsl/src/functions/list/join.rs b/src/daft-dsl/src/functions/list/join.rs index 5bb2d8fdcb..af51ef886f 100644 --- a/src/daft-dsl/src/functions/list/join.rs +++ b/src/daft-dsl/src/functions/list/join.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct JoinEvaluator {} diff --git a/src/daft-dsl/src/functions/list/max.rs b/src/daft-dsl/src/functions/list/max.rs index b6658b4d67..f77031a25f 100644 --- a/src/daft-dsl/src/functions/list/max.rs +++ b/src/daft-dsl/src/functions/list/max.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct MaxEvaluator {} diff --git a/src/daft-dsl/src/functions/list/mean.rs b/src/daft-dsl/src/functions/list/mean.rs index 9498646d8f..9d7a64e050 100644 --- a/src/daft-dsl/src/functions/list/mean.rs +++ b/src/daft-dsl/src/functions/list/mean.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::{datatypes::try_mean_supertype, prelude::*}; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::{datatypes::try_mean_supertype, prelude::*}; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct MeanEvaluator {} diff --git a/src/daft-dsl/src/functions/list/min.rs b/src/daft-dsl/src/functions/list/min.rs index 41bda23146..14a073ab6b 100644 --- a/src/daft-dsl/src/functions/list/min.rs +++ b/src/daft-dsl/src/functions/list/min.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct MinEvaluator {} diff --git a/src/daft-dsl/src/functions/list/mod.rs b/src/daft-dsl/src/functions/list/mod.rs index f09cda26e8..4e1cdc8a9e 100644 --- a/src/daft-dsl/src/functions/list/mod.rs +++ b/src/daft-dsl/src/functions/list/mod.rs @@ -22,9 +22,8 @@ use serde::{Deserialize, Serialize}; use slice::SliceEvaluator; use sum::SumEvaluator; -use crate::{Expr, ExprRef}; - use super::FunctionEvaluator; +use crate::{Expr, ExprRef}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum ListExpr { diff --git a/src/daft-dsl/src/functions/list/slice.rs b/src/daft-dsl/src/functions/list/slice.rs index e03bc0a173..e56dd88aa9 100644 --- a/src/daft-dsl/src/functions/list/slice.rs +++ b/src/daft-dsl/src/functions/list/slice.rs @@ -1,9 +1,8 @@ -use crate::ExprRef; +use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; use super::super::FunctionEvaluator; -use crate::functions::FunctionExpr; -use common_error::{DaftError, DaftResult}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct SliceEvaluator {} diff --git a/src/daft-dsl/src/functions/list/sum.rs b/src/daft-dsl/src/functions/list/sum.rs index 5e86d57a22..42710fa85f 100644 --- a/src/daft-dsl/src/functions/list/sum.rs +++ b/src/daft-dsl/src/functions/list/sum.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::{datatypes::try_sum_supertype, prelude::*}; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::{datatypes::try_sum_supertype, prelude::*}; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct SumEvaluator {} diff --git a/src/daft-dsl/src/functions/map/get.rs b/src/daft-dsl/src/functions/map/get.rs index 08f83b966e..ab6eb148f8 100644 --- a/src/daft-dsl/src/functions/map/get.rs +++ b/src/daft-dsl/src/functions/map/get.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct GetEvaluator {} diff --git a/src/daft-dsl/src/functions/map/mod.rs b/src/daft-dsl/src/functions/map/mod.rs index 871e723c5e..979a6ccd1e 100644 --- a/src/daft-dsl/src/functions/map/mod.rs +++ b/src/daft-dsl/src/functions/map/mod.rs @@ -3,9 +3,8 @@ mod get; use get::GetEvaluator; use serde::{Deserialize, Serialize}; -use crate::{Expr, ExprRef}; - use super::FunctionEvaluator; +use crate::{Expr, ExprRef}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum MapExpr { diff --git a/src/daft-dsl/src/functions/mod.rs b/src/daft-dsl/src/functions/mod.rs index e92c0621f2..f0f64e0f63 100644 --- a/src/daft-dsl/src/functions/mod.rs +++ b/src/daft-dsl/src/functions/mod.rs @@ -8,27 +8,22 @@ pub mod struct_; pub mod temporal; pub mod utf8; -use std::fmt::Write; -use std::fmt::{Display, Formatter, Result}; -use std::hash::Hash; - -use crate::{Expr, ExprRef, Operator}; - -use self::list::ListExpr; -use self::map::MapExpr; -use self::numeric::NumericExpr; -use self::partitioning::PartitioningExpr; -use self::sketch::SketchExpr; -use self::struct_::StructExpr; -use self::temporal::TemporalExpr; -use self::utf8::Utf8Expr; -pub use scalar::*; +use std::{ + fmt::{Display, Formatter, Result, Write}, + hash::Hash, +}; use common_error::DaftResult; use daft_core::prelude::*; - +pub use scalar::*; use serde::{Deserialize, Serialize}; +use self::{ + list::ListExpr, map::MapExpr, numeric::NumericExpr, partitioning::PartitioningExpr, + sketch::SketchExpr, struct_::StructExpr, temporal::TemporalExpr, utf8::Utf8Expr, +}; +use crate::{Expr, ExprRef, Operator}; + pub mod python; use python::PythonUDF; diff --git a/src/daft-dsl/src/functions/numeric/abs.rs b/src/daft-dsl/src/functions/numeric/abs.rs index 5dfe92a321..af8566960d 100644 --- a/src/daft-dsl/src/functions/numeric/abs.rs +++ b/src/daft-dsl/src/functions/numeric/abs.rs @@ -1,10 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::FunctionExpr; -use crate::ExprRef; - use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct AbsEvaluator {} diff --git a/src/daft-dsl/src/functions/numeric/ceil.rs b/src/daft-dsl/src/functions/numeric/ceil.rs index 1be899a0b2..735ab91bc3 100644 --- a/src/daft-dsl/src/functions/numeric/ceil.rs +++ b/src/daft-dsl/src/functions/numeric/ceil.rs @@ -1,10 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::FunctionExpr; -use crate::ExprRef; - use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct CeilEvaluator {} diff --git a/src/daft-dsl/src/functions/numeric/exp.rs b/src/daft-dsl/src/functions/numeric/exp.rs index 4e0b52eefe..bde9b90f6f 100644 --- a/src/daft-dsl/src/functions/numeric/exp.rs +++ b/src/daft-dsl/src/functions/numeric/exp.rs @@ -1,8 +1,10 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::{FunctionEvaluator, FunctionExpr}; -use crate::ExprRef; +use crate::{ + functions::{FunctionEvaluator, FunctionExpr}, + ExprRef, +}; pub(super) struct ExpEvaluator {} diff --git a/src/daft-dsl/src/functions/numeric/floor.rs b/src/daft-dsl/src/functions/numeric/floor.rs index 1435c5c674..a76de5fda9 100644 --- a/src/daft-dsl/src/functions/numeric/floor.rs +++ b/src/daft-dsl/src/functions/numeric/floor.rs @@ -2,8 +2,7 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; use super::super::FunctionEvaluator; -use crate::functions::FunctionExpr; -use crate::ExprRef; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct FloorEvaluator {} diff --git a/src/daft-dsl/src/functions/numeric/log.rs b/src/daft-dsl/src/functions/numeric/log.rs index acab14ce17..9c6105c449 100644 --- a/src/daft-dsl/src/functions/numeric/log.rs +++ b/src/daft-dsl/src/functions/numeric/log.rs @@ -1,11 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::FunctionExpr; -use crate::ExprRef; - -use super::super::FunctionEvaluator; -use super::NumericExpr; +use super::{super::FunctionEvaluator, NumericExpr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) enum LogFunction { Log2, diff --git a/src/daft-dsl/src/functions/numeric/mod.rs b/src/daft-dsl/src/functions/numeric/mod.rs index 08b357b42e..98ab6d3f32 100644 --- a/src/daft-dsl/src/functions/numeric/mod.rs +++ b/src/daft-dsl/src/functions/numeric/mod.rs @@ -8,6 +8,8 @@ mod sign; mod sqrt; mod trigonometry; +use std::hash::Hash; + use abs::AbsEvaluator; use ceil::CeilEvaluator; use common_hashable_float_wrapper::FloatWrapper; @@ -17,14 +19,16 @@ use round::RoundEvaluator; use serde::{Deserialize, Serialize}; use sign::SignEvaluator; use sqrt::SqrtEvaluator; -use std::hash::Hash; use trigonometry::Atan2Evaluator; -use crate::functions::numeric::exp::ExpEvaluator; -use crate::functions::numeric::trigonometry::{TrigonometricFunction, TrigonometryEvaluator}; -use crate::{Expr, ExprRef}; - use super::FunctionEvaluator; +use crate::{ + functions::numeric::{ + exp::ExpEvaluator, + trigonometry::{TrigonometricFunction, TrigonometryEvaluator}, + }, + Expr, ExprRef, +}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum NumericExpr { diff --git a/src/daft-dsl/src/functions/numeric/round.rs b/src/daft-dsl/src/functions/numeric/round.rs index 2724aa586c..aee8fa29c6 100644 --- a/src/daft-dsl/src/functions/numeric/round.rs +++ b/src/daft-dsl/src/functions/numeric/round.rs @@ -1,11 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use super::super::FunctionEvaluator; -use super::NumericExpr; -use crate::ExprRef; - -use crate::functions::FunctionExpr; +use super::{super::FunctionEvaluator, NumericExpr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct RoundEvaluator {} diff --git a/src/daft-dsl/src/functions/numeric/sign.rs b/src/daft-dsl/src/functions/numeric/sign.rs index 29dc883bb2..20dafba799 100644 --- a/src/daft-dsl/src/functions/numeric/sign.rs +++ b/src/daft-dsl/src/functions/numeric/sign.rs @@ -2,8 +2,7 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; use super::super::FunctionEvaluator; -use crate::functions::FunctionExpr; -use crate::ExprRef; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct SignEvaluator {} diff --git a/src/daft-dsl/src/functions/numeric/sqrt.rs b/src/daft-dsl/src/functions/numeric/sqrt.rs index 6af0380490..248ce1de7e 100644 --- a/src/daft-dsl/src/functions/numeric/sqrt.rs +++ b/src/daft-dsl/src/functions/numeric/sqrt.rs @@ -2,9 +2,7 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; use super::super::FunctionEvaluator; -use crate::ExprRef; - -use crate::functions::FunctionExpr; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct SqrtEvaluator {} diff --git a/src/daft-dsl/src/functions/numeric/trigonometry.rs b/src/daft-dsl/src/functions/numeric/trigonometry.rs index 7716e09db7..9779e802d4 100644 --- a/src/daft-dsl/src/functions/numeric/trigonometry.rs +++ b/src/daft-dsl/src/functions/numeric/trigonometry.rs @@ -2,8 +2,10 @@ use common_error::{DaftError, DaftResult}; pub use daft_core::array::ops::trigonometry::TrigonometricFunction; use daft_core::prelude::*; -use crate::functions::{FunctionEvaluator, FunctionExpr}; -use crate::ExprRef; +use crate::{ + functions::{FunctionEvaluator, FunctionExpr}, + ExprRef, +}; pub(super) struct TrigonometryEvaluator(pub TrigonometricFunction); diff --git a/src/daft-dsl/src/functions/partitioning/evaluators.rs b/src/daft-dsl/src/functions/partitioning/evaluators.rs index 0a7acc4ad7..a0622b40a4 100644 --- a/src/daft-dsl/src/functions/partitioning/evaluators.rs +++ b/src/daft-dsl/src/functions/partitioning/evaluators.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::{functions::partitioning::PartitioningExpr, ExprRef}; - use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::partitioning::PartitioningExpr, ExprRef}; macro_rules! impl_func_evaluator_for_partitioning { ($name:ident, $op:ident, $kernel:ident, $result_type:ident) => { @@ -54,8 +52,9 @@ macro_rules! impl_func_evaluator_for_partitioning { } }; } -use crate::functions::FunctionExpr; use DataType::{Date, Int32}; + +use crate::functions::FunctionExpr; impl_func_evaluator_for_partitioning!(YearsEvaluator, years, partitioning_years, Int32); impl_func_evaluator_for_partitioning!(MonthsEvaluator, months, partitioning_months, Int32); impl_func_evaluator_for_partitioning!(DaysEvaluator, days, partitioning_days, Date); diff --git a/src/daft-dsl/src/functions/partitioning/mod.rs b/src/daft-dsl/src/functions/partitioning/mod.rs index 5dfc88725d..9f37414e18 100644 --- a/src/daft-dsl/src/functions/partitioning/mod.rs +++ b/src/daft-dsl/src/functions/partitioning/mod.rs @@ -2,6 +2,7 @@ mod evaluators; use serde::{Deserialize, Serialize}; +use super::FunctionEvaluator; use crate::{ functions::partitioning::evaluators::{ DaysEvaluator, HoursEvaluator, IcebergBucketEvaluator, IcebergTruncateEvaluator, @@ -10,8 +11,6 @@ use crate::{ Expr, ExprRef, }; -use super::FunctionEvaluator; - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum PartitioningExpr { Years, diff --git a/src/daft-dsl/src/functions/python/mod.rs b/src/daft-dsl/src/functions/python/mod.rs index be63a474a5..d4690e3ec6 100644 --- a/src/daft-dsl/src/functions/python/mod.rs +++ b/src/daft-dsl/src/functions/python/mod.rs @@ -13,9 +13,8 @@ pub use runtime_py_object::RuntimePyObject; use serde::{Deserialize, Serialize}; pub use udf_runtime_binding::UDFRuntimeBinding; -use crate::{Expr, ExprRef}; - use super::{FunctionEvaluator, FunctionExpr}; +use crate::{Expr, ExprRef}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum PythonUDF { diff --git a/src/daft-dsl/src/functions/python/udf.rs b/src/daft-dsl/src/functions/python/udf.rs index e40efe0fce..7b9da47bf7 100644 --- a/src/daft-dsl/src/functions/python/udf.rs +++ b/src/daft-dsl/src/functions/python/udf.rs @@ -1,20 +1,13 @@ -use daft_core::datatypes::DataType; - +use common_error::{DaftError, DaftResult}; +use daft_core::{datatypes::DataType, prelude::*}; #[cfg(feature = "python")] use pyo3::{ types::{PyAnyMethods, PyModule}, Bound, PyAny, PyResult, }; -use daft_core::prelude::*; - -use crate::ExprRef; - -use common_error::{DaftError, DaftResult}; - -use super::super::FunctionEvaluator; -use super::{StatefulPythonUDF, StatelessPythonUDF}; -use crate::functions::FunctionExpr; +use super::{super::FunctionEvaluator, StatefulPythonUDF, StatelessPythonUDF}; +use crate::{functions::FunctionExpr, ExprRef}; impl FunctionEvaluator for StatelessPythonUDF { fn fn_name(&self) -> &'static str { @@ -63,8 +56,7 @@ fn run_udf( return_dtype: &DataType, batch_size: Option, ) -> DaftResult { - use daft_core::python::PyDataType; - use daft_core::python::PySeries; + use daft_core::python::{PyDataType, PySeries}; // Convert input Rust &[Series] to wrapped Python Vec> let py_series_module = PyModule::import_bound(py, pyo3::intern!(py, "daft.series"))?; @@ -174,12 +166,13 @@ impl FunctionEvaluator for StatefulPythonUDF { #[cfg(feature = "python")] { - use crate::functions::python::udf_runtime_binding::UDFRuntimeBinding; use pyo3::{ types::{PyDict, PyTuple}, Python, }; + use crate::functions::python::udf_runtime_binding::UDFRuntimeBinding; + if inputs.len() != self.num_expressions { return Err(DaftError::SchemaMismatch(format!( "Number of inputs required by UDF {} does not match number of inputs provided: {}", diff --git a/src/daft-dsl/src/functions/scalar.rs b/src/daft-dsl/src/functions/scalar.rs index 4a77454f34..7e610ba6eb 100644 --- a/src/daft-dsl/src/functions/scalar.rs +++ b/src/daft-dsl/src/functions/scalar.rs @@ -1,14 +1,15 @@ -use std::any::Any; -use std::fmt::{Display, Formatter}; -use std::sync::Arc; +use std::{ + any::Any, + fmt::{Display, Formatter}, + sync::Arc, +}; use common_error::DaftResult; use daft_core::prelude::*; +use serde::{Deserialize, Serialize}; use crate::{Expr, ExprRef}; -use serde::{Deserialize, Serialize}; - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ScalarFunction { pub udf: Arc, diff --git a/src/daft-dsl/src/functions/sketch/mod.rs b/src/daft-dsl/src/functions/sketch/mod.rs index ae74ab25aa..87c5df6f6d 100644 --- a/src/daft-dsl/src/functions/sketch/mod.rs +++ b/src/daft-dsl/src/functions/sketch/mod.rs @@ -1,12 +1,10 @@ mod percentile; use percentile::PercentileEvaluator; - use serde::{Deserialize, Serialize}; -use crate::{Expr, ExprRef}; - use super::FunctionEvaluator; +use crate::{Expr, ExprRef}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct HashableVecPercentiles(pub Vec); diff --git a/src/daft-dsl/src/functions/sketch/percentile.rs b/src/daft-dsl/src/functions/sketch/percentile.rs index 6102bf1d15..65ee4a1e7c 100644 --- a/src/daft-dsl/src/functions/sketch/percentile.rs +++ b/src/daft-dsl/src/functions/sketch/percentile.rs @@ -1,10 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use super::super::FunctionEvaluator; -use super::SketchExpr; -use crate::functions::FunctionExpr; -use crate::ExprRef; +use super::{super::FunctionEvaluator, SketchExpr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct PercentileEvaluator {} diff --git a/src/daft-dsl/src/functions/struct_/get.rs b/src/daft-dsl/src/functions/struct_/get.rs index 859e438c67..5e39e83ab2 100644 --- a/src/daft-dsl/src/functions/struct_/get.rs +++ b/src/daft-dsl/src/functions/struct_/get.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, StructExpr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct GetEvaluator {} diff --git a/src/daft-dsl/src/functions/struct_/mod.rs b/src/daft-dsl/src/functions/struct_/mod.rs index 80b5c27221..c842c45c64 100644 --- a/src/daft-dsl/src/functions/struct_/mod.rs +++ b/src/daft-dsl/src/functions/struct_/mod.rs @@ -3,9 +3,8 @@ mod get; use get::GetEvaluator; use serde::{Deserialize, Serialize}; -use crate::{Expr, ExprRef}; - use super::FunctionEvaluator; +use crate::{Expr, ExprRef}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum StructExpr { diff --git a/src/daft-dsl/src/functions/temporal/date.rs b/src/daft-dsl/src/functions/temporal/date.rs index 9c149c3641..0d7b70ab83 100644 --- a/src/daft-dsl/src/functions/temporal/date.rs +++ b/src/daft-dsl/src/functions/temporal/date.rs @@ -1,10 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::FunctionExpr; -use crate::ExprRef; - use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct DateEvaluator {} diff --git a/src/daft-dsl/src/functions/temporal/day.rs b/src/daft-dsl/src/functions/temporal/day.rs index 3f1856ef99..bb06d6f5fa 100644 --- a/src/daft-dsl/src/functions/temporal/day.rs +++ b/src/daft-dsl/src/functions/temporal/day.rs @@ -1,10 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::FunctionExpr; -use crate::ExprRef; - use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct DayEvaluator {} diff --git a/src/daft-dsl/src/functions/temporal/day_of_week.rs b/src/daft-dsl/src/functions/temporal/day_of_week.rs index 6bf80932f1..23fe76947b 100644 --- a/src/daft-dsl/src/functions/temporal/day_of_week.rs +++ b/src/daft-dsl/src/functions/temporal/day_of_week.rs @@ -1,11 +1,8 @@ -use daft_core::prelude::*; - -use crate::ExprRef; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct DayOfWeekEvaluator {} diff --git a/src/daft-dsl/src/functions/temporal/hour.rs b/src/daft-dsl/src/functions/temporal/hour.rs index 676632ad41..e3f775577a 100644 --- a/src/daft-dsl/src/functions/temporal/hour.rs +++ b/src/daft-dsl/src/functions/temporal/hour.rs @@ -1,10 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::FunctionExpr; -use crate::ExprRef; - use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct HourEvaluator {} diff --git a/src/daft-dsl/src/functions/temporal/minute.rs b/src/daft-dsl/src/functions/temporal/minute.rs index 5e12127dde..ffdf5f29ea 100644 --- a/src/daft-dsl/src/functions/temporal/minute.rs +++ b/src/daft-dsl/src/functions/temporal/minute.rs @@ -1,10 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::FunctionExpr; -use crate::ExprRef; - use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct MinuteEvaluator {} diff --git a/src/daft-dsl/src/functions/temporal/mod.rs b/src/daft-dsl/src/functions/temporal/mod.rs index ae598aa5e1..668d72f164 100644 --- a/src/daft-dsl/src/functions/temporal/mod.rs +++ b/src/daft-dsl/src/functions/temporal/mod.rs @@ -11,14 +11,16 @@ mod year; use serde::{Deserialize, Serialize}; -use crate::functions::temporal::{ - date::DateEvaluator, day::DayEvaluator, day_of_week::DayOfWeekEvaluator, hour::HourEvaluator, - minute::MinuteEvaluator, month::MonthEvaluator, second::SecondEvaluator, time::TimeEvaluator, - truncate::TruncateEvaluator, year::YearEvaluator, -}; -use crate::{Expr, ExprRef}; - use super::FunctionEvaluator; +use crate::{ + functions::temporal::{ + date::DateEvaluator, day::DayEvaluator, day_of_week::DayOfWeekEvaluator, + hour::HourEvaluator, minute::MinuteEvaluator, month::MonthEvaluator, + second::SecondEvaluator, time::TimeEvaluator, truncate::TruncateEvaluator, + year::YearEvaluator, + }, + Expr, ExprRef, +}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum TemporalExpr { diff --git a/src/daft-dsl/src/functions/temporal/month.rs b/src/daft-dsl/src/functions/temporal/month.rs index 37c5d79a21..b93af55090 100644 --- a/src/daft-dsl/src/functions/temporal/month.rs +++ b/src/daft-dsl/src/functions/temporal/month.rs @@ -1,11 +1,8 @@ -use daft_core::prelude::*; - -use crate::ExprRef; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct MonthEvaluator {} diff --git a/src/daft-dsl/src/functions/temporal/second.rs b/src/daft-dsl/src/functions/temporal/second.rs index b87283c6d9..490803f5a2 100644 --- a/src/daft-dsl/src/functions/temporal/second.rs +++ b/src/daft-dsl/src/functions/temporal/second.rs @@ -1,10 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::FunctionExpr; -use crate::ExprRef; - use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct SecondEvaluator {} diff --git a/src/daft-dsl/src/functions/temporal/time.rs b/src/daft-dsl/src/functions/temporal/time.rs index 4da0da2d9e..32ff78e459 100644 --- a/src/daft-dsl/src/functions/temporal/time.rs +++ b/src/daft-dsl/src/functions/temporal/time.rs @@ -1,10 +1,8 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use crate::functions::FunctionExpr; -use crate::ExprRef; - use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct TimeEvaluator {} diff --git a/src/daft-dsl/src/functions/temporal/truncate.rs b/src/daft-dsl/src/functions/temporal/truncate.rs index 1e0aebcdf2..785486dc7f 100644 --- a/src/daft-dsl/src/functions/temporal/truncate.rs +++ b/src/daft-dsl/src/functions/temporal/truncate.rs @@ -1,11 +1,8 @@ -use daft_core::prelude::*; - -use crate::ExprRef; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, TemporalExpr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct TruncateEvaluator {} diff --git a/src/daft-dsl/src/functions/temporal/year.rs b/src/daft-dsl/src/functions/temporal/year.rs index 6f289bb213..5557926a04 100644 --- a/src/daft-dsl/src/functions/temporal/year.rs +++ b/src/daft-dsl/src/functions/temporal/year.rs @@ -1,11 +1,8 @@ -use daft_core::prelude::*; - -use crate::ExprRef; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct YearEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/capitalize.rs b/src/daft-dsl/src/functions/utf8/capitalize.rs index 2236df3e9b..caa3c25359 100644 --- a/src/daft-dsl/src/functions/utf8/capitalize.rs +++ b/src/daft-dsl/src/functions/utf8/capitalize.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct CapitalizeEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/contains.rs b/src/daft-dsl/src/functions/utf8/contains.rs index f14b78f719..8c63b17be3 100644 --- a/src/daft-dsl/src/functions/utf8/contains.rs +++ b/src/daft-dsl/src/functions/utf8/contains.rs @@ -1,11 +1,8 @@ -use daft_core::prelude::*; - -use crate::ExprRef; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct ContainsEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/endswith.rs b/src/daft-dsl/src/functions/utf8/endswith.rs index a05d2be776..5785f92257 100644 --- a/src/daft-dsl/src/functions/utf8/endswith.rs +++ b/src/daft-dsl/src/functions/utf8/endswith.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct EndswithEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/extract.rs b/src/daft-dsl/src/functions/utf8/extract.rs index d6da24a07d..abe9d4df16 100644 --- a/src/daft-dsl/src/functions/utf8/extract.rs +++ b/src/daft-dsl/src/functions/utf8/extract.rs @@ -1,11 +1,8 @@ -use daft_core::prelude::*; - -use crate::ExprRef; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, Utf8Expr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct ExtractEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/extract_all.rs b/src/daft-dsl/src/functions/utf8/extract_all.rs index d96bef730a..e2395e8c19 100644 --- a/src/daft-dsl/src/functions/utf8/extract_all.rs +++ b/src/daft-dsl/src/functions/utf8/extract_all.rs @@ -1,11 +1,8 @@ -use daft_core::prelude::*; - -use crate::ExprRef; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, Utf8Expr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct ExtractAllEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/find.rs b/src/daft-dsl/src/functions/utf8/find.rs index e74d2ab021..d184d17c5f 100644 --- a/src/daft-dsl/src/functions/utf8/find.rs +++ b/src/daft-dsl/src/functions/utf8/find.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct FindEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/ilike.rs b/src/daft-dsl/src/functions/utf8/ilike.rs index 51b5a44951..35c0ce1e20 100644 --- a/src/daft-dsl/src/functions/utf8/ilike.rs +++ b/src/daft-dsl/src/functions/utf8/ilike.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct IlikeEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/left.rs b/src/daft-dsl/src/functions/utf8/left.rs index b62f2226b1..ffde503901 100644 --- a/src/daft-dsl/src/functions/utf8/left.rs +++ b/src/daft-dsl/src/functions/utf8/left.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct LeftEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/length.rs b/src/daft-dsl/src/functions/utf8/length.rs index f8e08cae56..9f4729ac76 100644 --- a/src/daft-dsl/src/functions/utf8/length.rs +++ b/src/daft-dsl/src/functions/utf8/length.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct LengthEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/length_bytes.rs b/src/daft-dsl/src/functions/utf8/length_bytes.rs index ec6532edc8..cdf0af383a 100644 --- a/src/daft-dsl/src/functions/utf8/length_bytes.rs +++ b/src/daft-dsl/src/functions/utf8/length_bytes.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct LengthBytesEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/like.rs b/src/daft-dsl/src/functions/utf8/like.rs index 3263a3d72c..a2a2a96def 100644 --- a/src/daft-dsl/src/functions/utf8/like.rs +++ b/src/daft-dsl/src/functions/utf8/like.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct LikeEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/lower.rs b/src/daft-dsl/src/functions/utf8/lower.rs index 71574590a4..f3fd7a8c47 100644 --- a/src/daft-dsl/src/functions/utf8/lower.rs +++ b/src/daft-dsl/src/functions/utf8/lower.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct LowerEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/lpad.rs b/src/daft-dsl/src/functions/utf8/lpad.rs index 2fcf3bad2e..9880568aed 100644 --- a/src/daft-dsl/src/functions/utf8/lpad.rs +++ b/src/daft-dsl/src/functions/utf8/lpad.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct LpadEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/lstrip.rs b/src/daft-dsl/src/functions/utf8/lstrip.rs index 42b295c526..534aa1cd37 100644 --- a/src/daft-dsl/src/functions/utf8/lstrip.rs +++ b/src/daft-dsl/src/functions/utf8/lstrip.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct LstripEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/match_.rs b/src/daft-dsl/src/functions/utf8/match_.rs index 9f703f5f38..7455aca17c 100644 --- a/src/daft-dsl/src/functions/utf8/match_.rs +++ b/src/daft-dsl/src/functions/utf8/match_.rs @@ -1,11 +1,8 @@ -use daft_core::prelude::*; - -use crate::ExprRef; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct MatchEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/mod.rs b/src/daft-dsl/src/functions/utf8/mod.rs index 13b1dd10c0..cb3a07aca1 100644 --- a/src/daft-dsl/src/functions/utf8/mod.rs +++ b/src/daft-dsl/src/functions/utf8/mod.rs @@ -57,9 +57,8 @@ use to_date::ToDateEvaluator; use to_datetime::ToDatetimeEvaluator; use upper::UpperEvaluator; -use crate::{functions::utf8::match_::MatchEvaluator, Expr, ExprRef}; - use super::FunctionEvaluator; +use crate::{functions::utf8::match_::MatchEvaluator, Expr, ExprRef}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum Utf8Expr { diff --git a/src/daft-dsl/src/functions/utf8/normalize.rs b/src/daft-dsl/src/functions/utf8/normalize.rs index 837e28cf95..b693e2c017 100644 --- a/src/daft-dsl/src/functions/utf8/normalize.rs +++ b/src/daft-dsl/src/functions/utf8/normalize.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, Utf8Expr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct NormalizeEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/repeat.rs b/src/daft-dsl/src/functions/utf8/repeat.rs index 8d523f7183..c321a6920a 100644 --- a/src/daft-dsl/src/functions/utf8/repeat.rs +++ b/src/daft-dsl/src/functions/utf8/repeat.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct RepeatEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/replace.rs b/src/daft-dsl/src/functions/utf8/replace.rs index da5b791bed..022f98ac17 100644 --- a/src/daft-dsl/src/functions/utf8/replace.rs +++ b/src/daft-dsl/src/functions/utf8/replace.rs @@ -1,11 +1,8 @@ -use daft_core::prelude::*; - -use crate::ExprRef; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, Utf8Expr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct ReplaceEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/reverse.rs b/src/daft-dsl/src/functions/utf8/reverse.rs index e544f8a15f..cff9363a82 100644 --- a/src/daft-dsl/src/functions/utf8/reverse.rs +++ b/src/daft-dsl/src/functions/utf8/reverse.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct ReverseEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/right.rs b/src/daft-dsl/src/functions/utf8/right.rs index 0e4c1b5058..892c0f7341 100644 --- a/src/daft-dsl/src/functions/utf8/right.rs +++ b/src/daft-dsl/src/functions/utf8/right.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct RightEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/rpad.rs b/src/daft-dsl/src/functions/utf8/rpad.rs index ef5d115c3d..f7c0769fac 100644 --- a/src/daft-dsl/src/functions/utf8/rpad.rs +++ b/src/daft-dsl/src/functions/utf8/rpad.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct RpadEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/rstrip.rs b/src/daft-dsl/src/functions/utf8/rstrip.rs index b0a87c3591..c138d4c86c 100644 --- a/src/daft-dsl/src/functions/utf8/rstrip.rs +++ b/src/daft-dsl/src/functions/utf8/rstrip.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct RstripEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/split.rs b/src/daft-dsl/src/functions/utf8/split.rs index 95787bd933..0518786055 100644 --- a/src/daft-dsl/src/functions/utf8/split.rs +++ b/src/daft-dsl/src/functions/utf8/split.rs @@ -1,9 +1,8 @@ -use crate::{functions::FunctionExpr, ExprRef}; -use daft_core::prelude::*; - use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, Utf8Expr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct SplitEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/startswith.rs b/src/daft-dsl/src/functions/utf8/startswith.rs index 1fe6f5e833..01ae5eda7e 100644 --- a/src/daft-dsl/src/functions/utf8/startswith.rs +++ b/src/daft-dsl/src/functions/utf8/startswith.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct StartswithEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/substr.rs b/src/daft-dsl/src/functions/utf8/substr.rs index c03423a87c..d2ec60256a 100644 --- a/src/daft-dsl/src/functions/utf8/substr.rs +++ b/src/daft-dsl/src/functions/utf8/substr.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct SubstrEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/to_date.rs b/src/daft-dsl/src/functions/utf8/to_date.rs index 942af3bdc8..58adecbc05 100644 --- a/src/daft-dsl/src/functions/utf8/to_date.rs +++ b/src/daft-dsl/src/functions/utf8/to_date.rs @@ -1,10 +1,8 @@ -use crate::ExprRef; -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, Utf8Expr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct ToDateEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/to_datetime.rs b/src/daft-dsl/src/functions/utf8/to_datetime.rs index b92a58aaa9..25368c8e64 100644 --- a/src/daft-dsl/src/functions/utf8/to_datetime.rs +++ b/src/daft-dsl/src/functions/utf8/to_datetime.rs @@ -1,11 +1,8 @@ -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; -use daft_core::prelude::*; - -use daft_core::datatypes::infer_timeunit_from_format_string; +use daft_core::{datatypes::infer_timeunit_from_format_string, prelude::*}; use super::{super::FunctionEvaluator, Utf8Expr}; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct ToDatetimeEvaluator {} diff --git a/src/daft-dsl/src/functions/utf8/upper.rs b/src/daft-dsl/src/functions/utf8/upper.rs index 61d0cf2943..a02438b495 100644 --- a/src/daft-dsl/src/functions/utf8/upper.rs +++ b/src/daft-dsl/src/functions/utf8/upper.rs @@ -1,10 +1,8 @@ -use daft_core::prelude::*; - -use crate::functions::FunctionExpr; -use crate::ExprRef; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; +use crate::{functions::FunctionExpr, ExprRef}; pub(super) struct UpperEvaluator {} diff --git a/src/daft-dsl/src/join.rs b/src/daft-dsl/src/join.rs index 29ac157151..2f1cf96cb2 100644 --- a/src/daft-dsl/src/join.rs +++ b/src/daft-dsl/src/join.rs @@ -82,9 +82,8 @@ pub fn infer_join_schema( #[cfg(test)] mod tests { - use crate::col; - use super::*; + use crate::col; #[test] fn test_get_common_join_keys() { diff --git a/src/daft-dsl/src/lit.rs b/src/daft-dsl/src/lit.rs index b85d8804af..065c2a4d4e 100644 --- a/src/daft-dsl/src/lit.rs +++ b/src/daft-dsl/src/lit.rs @@ -1,5 +1,9 @@ -use crate::expr::Expr; -use crate::ExprRef; +use std::{ + fmt::{Display, Formatter, Result}, + hash::{Hash, Hasher}, + io::{self, Write}, + sync::Arc, +}; use common_error::{DaftError, DaftResult}; use common_hashable_float_wrapper::FloatWrapper; @@ -11,15 +15,10 @@ use daft_core::{ }, }; use serde::{Deserialize, Serialize}; -use std::io::{self, Write}; -use std::sync::Arc; -use std::{ - fmt::{Display, Formatter, Result}, - hash::{Hash, Hasher}, -}; #[cfg(feature = "python")] use crate::pyobj_serde::PyObjectWrapper; +use crate::{expr::Expr, ExprRef}; /// Stores a literal value for queries and computations. /// We only need to support the limited types below since those are the types that we would get from python. @@ -369,8 +368,7 @@ pub fn null_lit() -> ExprRef { /// Convert a slice of literals to a series. /// This function will return an error if the literals are not all the same type pub fn literals_to_series(values: &[LiteralValue]) -> DaftResult { - use daft_core::datatypes::*; - use daft_core::series::IntoSeries; + use daft_core::{datatypes::*, series::IntoSeries}; let dtype = values[0].get_type(); diff --git a/src/daft-dsl/src/optimization.rs b/src/daft-dsl/src/optimization.rs index 1fa295a1dd..adafd94b78 100644 --- a/src/daft-dsl/src/optimization.rs +++ b/src/daft-dsl/src/optimization.rs @@ -2,9 +2,8 @@ use std::collections::HashMap; use common_treenode::{Transformed, TreeNode, TreeNodeRecursion}; -use crate::{ExprRef, Operator}; - use super::expr::Expr; +use crate::{ExprRef, Operator}; pub fn get_required_columns(e: &ExprRef) -> Vec { let mut cols = vec![]; diff --git a/src/daft-dsl/src/python.rs b/src/daft-dsl/src/python.rs index bebad3d766..3aacabb6f1 100644 --- a/src/daft-dsl/src/python.rs +++ b/src/daft-dsl/src/python.rs @@ -1,26 +1,26 @@ -use std::collections::hash_map::DefaultHasher; - -use std::collections::HashMap; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; +use std::{ + collections::{hash_map::DefaultHasher, HashMap}, + hash::{Hash, Hasher}, + sync::Arc, +}; use common_error::DaftError; use common_py_serde::impl_bincode_py_state_serialization; use common_resource_request::ResourceRequest; -use daft_core::array::ops::Utf8NormalizeOptions; -use daft_core::python::PySeries; -use daft_core::python::{PyDataType, PyField, PySchema, PyTimeUnit}; -use serde::{Deserialize, Serialize}; - -use crate::{functions, Expr, ExprRef, LiteralValue}; -use daft_core::prelude::*; - +use daft_core::{ + array::ops::Utf8NormalizeOptions, + prelude::*, + python::{PyDataType, PyField, PySchema, PySeries, PyTimeUnit}, +}; use pyo3::{ exceptions::PyValueError, prelude::*, pyclass::CompareOp, types::{PyBool, PyBytes, PyFloat, PyInt, PyString}, }; +use serde::{Deserialize, Serialize}; + +use crate::{functions, Expr, ExprRef, LiteralValue}; #[pyfunction] pub fn col(name: &str) -> PyResult { diff --git a/src/daft-dsl/src/resolve_expr.rs b/src/daft-dsl/src/resolve_expr.rs index 0fb45f2f24..7c8b1820c7 100644 --- a/src/daft-dsl/src/resolve_expr.rs +++ b/src/daft-dsl/src/resolve_expr.rs @@ -1,15 +1,15 @@ -use crate::{col, AggExpr, ApproxPercentileParams, Expr, ExprRef}; -use crate::{expr::has_agg, has_stateful_udf}; -use common_error::{DaftError, DaftResult}; -use common_treenode::{Transformed, TransformedResult, TreeNode}; -use daft_core::prelude::*; - use std::{ cmp::Ordering, collections::{BinaryHeap, HashMap}, sync::Arc, }; +use common_error::{DaftError, DaftResult}; +use common_treenode::{Transformed, TransformedResult, TreeNode}; +use daft_core::prelude::*; + +use crate::{col, expr::has_agg, has_stateful_udf, AggExpr, ApproxPercentileParams, Expr, ExprRef}; + // Calculates all the possible struct get expressions in a schema. // For each sugared string, calculates all possible corresponding expressions, in order of priority. fn calculate_struct_expr_map(schema: &Schema) -> HashMap> { diff --git a/src/daft-functions-json/src/expr.rs b/src/daft-functions-json/src/expr.rs index 232f6f28c4..712a1a7433 100644 --- a/src/daft-functions-json/src/expr.rs +++ b/src/daft-functions-json/src/expr.rs @@ -1,8 +1,7 @@ -use daft_core::prelude::*; -use daft_dsl::{functions::ScalarUDF, ExprRef}; - // use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; +use daft_dsl::{functions::ScalarUDF, ExprRef}; use serde::{Deserialize, Serialize}; use crate::json_query_series; diff --git a/src/daft-functions/src/count_matches.rs b/src/daft-functions/src/count_matches.rs index a4861b4a99..89df9274a9 100644 --- a/src/daft-functions/src/count_matches.rs +++ b/src/daft-functions/src/count_matches.rs @@ -1,5 +1,4 @@ use common_error::{DaftError, DaftResult}; - use daft_core::prelude::*; use daft_dsl::{ functions::{ScalarFunction, ScalarUDF}, diff --git a/src/daft-functions/src/distance/cosine.rs b/src/daft-functions/src/distance/cosine.rs index 40e2302ad6..c7065ff655 100644 --- a/src/daft-functions/src/distance/cosine.rs +++ b/src/daft-functions/src/distance/cosine.rs @@ -1,6 +1,5 @@ use common_error::{DaftError, DaftResult}; use daft_core::{datatypes::NumericNative, prelude::*}; - use daft_dsl::{ functions::{ScalarFunction, ScalarUDF}, ExprRef, diff --git a/src/daft-functions/src/float/mod.rs b/src/daft-functions/src/float/mod.rs index 82f348a736..a1aefd9f92 100644 --- a/src/daft-functions/src/float/mod.rs +++ b/src/daft-functions/src/float/mod.rs @@ -7,7 +7,6 @@ pub use fill_nan::fill_nan; pub use is_inf::is_inf; pub use is_nan::is_nan; pub use not_nan::not_nan; - #[cfg(feature = "python")] use pyo3::prelude::*; diff --git a/src/daft-functions/src/hash.rs b/src/daft-functions/src/hash.rs index a584c6e51f..d21f49768a 100644 --- a/src/daft-functions/src/hash.rs +++ b/src/daft-functions/src/hash.rs @@ -1,6 +1,5 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; - use daft_dsl::{ functions::{ScalarFunction, ScalarUDF}, ExprRef, diff --git a/src/daft-functions/src/image/crop.rs b/src/daft-functions/src/image/crop.rs index c350914340..ee464c2ca9 100644 --- a/src/daft-functions/src/image/crop.rs +++ b/src/daft-functions/src/image/crop.rs @@ -1,9 +1,9 @@ -use common_error::DaftError; +use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; - -use common_error::DaftResult; -use daft_dsl::functions::{ScalarFunction, ScalarUDF}; -use daft_dsl::ExprRef; +use daft_dsl::{ + functions::{ScalarFunction, ScalarUDF}, + ExprRef, +}; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] diff --git a/src/daft-functions/src/image/decode.rs b/src/daft-functions/src/image/decode.rs index 72173b16d6..99aabbee8a 100644 --- a/src/daft-functions/src/image/decode.rs +++ b/src/daft-functions/src/image/decode.rs @@ -1,6 +1,5 @@ -use daft_core::prelude::*; - use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use daft_dsl::{ functions::{ScalarFunction, ScalarUDF}, ExprRef, diff --git a/src/daft-functions/src/image/encode.rs b/src/daft-functions/src/image/encode.rs index 5c465ccbb4..f1a5bfaea4 100644 --- a/src/daft-functions/src/image/encode.rs +++ b/src/daft-functions/src/image/encode.rs @@ -1,6 +1,5 @@ -use daft_core::prelude::*; - use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use daft_dsl::{ functions::{ScalarFunction, ScalarUDF}, ExprRef, diff --git a/src/daft-functions/src/image/resize.rs b/src/daft-functions/src/image/resize.rs index ce493f91ac..cac9fd7cf1 100644 --- a/src/daft-functions/src/image/resize.rs +++ b/src/daft-functions/src/image/resize.rs @@ -1,9 +1,9 @@ -use common_error::DaftError; +use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; - -use common_error::DaftResult; -use daft_dsl::functions::{ScalarFunction, ScalarUDF}; -use daft_dsl::ExprRef; +use daft_dsl::{ + functions::{ScalarFunction, ScalarUDF}, + ExprRef, +}; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] diff --git a/src/daft-functions/src/image/to_mode.rs b/src/daft-functions/src/image/to_mode.rs index 349b56304a..5d46a376dd 100644 --- a/src/daft-functions/src/image/to_mode.rs +++ b/src/daft-functions/src/image/to_mode.rs @@ -1,6 +1,5 @@ -use daft_core::prelude::*; - use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use daft_dsl::{ functions::{ScalarFunction, ScalarUDF}, ExprRef, diff --git a/src/daft-functions/src/list_sort.rs b/src/daft-functions/src/list_sort.rs index ce94651173..34037f5a43 100644 --- a/src/daft-functions/src/list_sort.rs +++ b/src/daft-functions/src/list_sort.rs @@ -1,5 +1,4 @@ use common_error::{DaftError, DaftResult}; - use daft_core::prelude::*; use daft_dsl::{ functions::{ScalarFunction, ScalarUDF}, diff --git a/src/daft-functions/src/to_struct.rs b/src/daft-functions/src/to_struct.rs index bf4b0480ad..73f390eb26 100644 --- a/src/daft-functions/src/to_struct.rs +++ b/src/daft-functions/src/to_struct.rs @@ -1,6 +1,5 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; - use daft_dsl::{ functions::{ScalarFunction, ScalarUDF}, ExprRef, diff --git a/src/daft-functions/src/tokenize/bpe.rs b/src/daft-functions/src/tokenize/bpe.rs index 839ddda2b1..18307092b4 100644 --- a/src/daft-functions/src/tokenize/bpe.rs +++ b/src/daft-functions/src/tokenize/bpe.rs @@ -9,8 +9,7 @@ use std::{ use base64::{engine::general_purpose, DecodeError, Engine}; use common_error::{DaftError, DaftResult}; use daft_io::{get_io_client, get_runtime, IOConfig}; -use snafu::prelude::*; -use snafu::Snafu; +use snafu::{prelude::*, Snafu}; use tiktoken_rs::CoreBPE; use super::special_tokens::get_special_tokens; diff --git a/src/daft-functions/src/tokenize/decode.rs b/src/daft-functions/src/tokenize/decode.rs index a0b02d1231..30a713f993 100644 --- a/src/daft-functions/src/tokenize/decode.rs +++ b/src/daft-functions/src/tokenize/decode.rs @@ -1,9 +1,7 @@ use std::sync::Arc; use common_error::{DaftError, DaftResult}; - use daft_core::prelude::*; - use daft_dsl::{functions::ScalarUDF, ExprRef}; use daft_io::IOConfig; use serde::{Deserialize, Serialize}; diff --git a/src/daft-functions/src/tokenize/encode.rs b/src/daft-functions/src/tokenize/encode.rs index 1610444e10..e36f9be4d2 100644 --- a/src/daft-functions/src/tokenize/encode.rs +++ b/src/daft-functions/src/tokenize/encode.rs @@ -5,7 +5,6 @@ use arrow2::{ offset::OffsetsBuffer, }; use common_error::{DaftError, DaftResult}; - use daft_core::prelude::*; use daft_dsl::{functions::ScalarUDF, ExprRef}; use daft_io::IOConfig; diff --git a/src/daft-functions/src/uri/download.rs b/src/daft-functions/src/uri/download.rs index 7b1e0d3145..4bacbdb3a9 100644 --- a/src/daft-functions/src/uri/download.rs +++ b/src/daft-functions/src/uri/download.rs @@ -1,15 +1,13 @@ use std::sync::Arc; +use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use daft_dsl::functions::ScalarUDF; -use daft_dsl::ExprRef; +use daft_dsl::{functions::ScalarUDF, ExprRef}; use daft_io::{get_io_client, get_runtime, Error, IOConfig, IOStatsContext, IOStatsRef}; use futures::{StreamExt, TryStreamExt}; use serde::Serialize; use snafu::prelude::*; -use common_error::{DaftError, DaftResult}; - use crate::InvalidArgumentSnafu; #[derive(Debug, Clone, Serialize, serde::Deserialize, PartialEq, Eq, Hash)] diff --git a/src/daft-functions/src/uri/upload.rs b/src/daft-functions/src/uri/upload.rs index 887be0af7b..14f7a3721c 100644 --- a/src/daft-functions/src/uri/upload.rs +++ b/src/daft-functions/src/uri/upload.rs @@ -1,15 +1,12 @@ use std::sync::Arc; +use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; - -use daft_dsl::functions::ScalarUDF; -use daft_dsl::ExprRef; +use daft_dsl::{functions::ScalarUDF, ExprRef}; use daft_io::{get_io_client, get_runtime, IOConfig, IOStatsRef, SourceType}; use futures::{StreamExt, TryStreamExt}; use serde::Serialize; -use common_error::{DaftError, DaftResult}; - #[derive(Debug, Clone, Serialize, serde::Deserialize, PartialEq, Eq, Hash)] pub(super) struct UploadFunction { pub(super) location: String, diff --git a/src/daft-image/src/image_buffer.rs b/src/daft-image/src/image_buffer.rs index 0855fd4ec8..18db67e0df 100644 --- a/src/daft-image/src/image_buffer.rs +++ b/src/daft-image/src/image_buffer.rs @@ -1,11 +1,12 @@ +use std::{ + borrow::Cow, + io::{Seek, Write}, + ops::Deref, +}; + use common_error::{DaftError, DaftResult}; -use daft_core::array::image_array::BBox; -use daft_core::datatypes::prelude::*; -use image::{ColorType, DynamicImage, ImageBuffer}; -use image::{Luma, LumaA, Rgb, Rgba}; -use std::borrow::Cow; -use std::io::{Seek, Write}; -use std::ops::Deref; +use daft_core::{array::image_array::BBox, datatypes::prelude::*}; +use image::{ColorType, DynamicImage, ImageBuffer, Luma, LumaA, Rgb, Rgba}; #[allow(clippy::upper_case_acronyms, dead_code)] #[derive(Debug)] diff --git a/src/daft-image/src/ops.rs b/src/daft-image/src/ops.rs index c949df9463..6a3e2a6a75 100644 --- a/src/daft-image/src/ops.rs +++ b/src/daft-image/src/ops.rs @@ -1,13 +1,18 @@ -use crate::{iters::*, CountingWriter, DaftImageBuffer}; +use std::{borrow::Cow, sync::Arc}; + use base64::Engine; use common_error::{DaftError, DaftResult}; -use daft_core::array::image_array::{BBox, ImageArraySidecarData}; -use daft_core::array::prelude::*; -use daft_core::datatypes::prelude::*; -use daft_core::prelude::ImageArray; +use daft_core::{ + array::{ + image_array::{BBox, ImageArraySidecarData}, + prelude::*, + }, + datatypes::prelude::*, + prelude::ImageArray, +}; use num_traits::FromPrimitive; -use std::borrow::Cow; -use std::sync::Arc; + +use crate::{iters::*, CountingWriter, DaftImageBuffer}; #[allow(clippy::len_without_is_empty)] pub trait AsImageObj { diff --git a/src/daft-image/src/series.rs b/src/daft-image/src/series.rs index 7fe2255b7e..636353768e 100644 --- a/src/daft-image/src/series.rs +++ b/src/daft-image/src/series.rs @@ -1,6 +1,5 @@ -use daft_core::prelude::*; - use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use crate::{ ops::{image_array_from_img_buffers, ImageOps}, diff --git a/src/daft-io/src/azure_blob.rs b/src/daft-io/src/azure_blob.rs index 0a12dc704c..de2d05bd5d 100644 --- a/src/daft-io/src/azure_blob.rs +++ b/src/daft-io/src/azure_blob.rs @@ -1,3 +1,5 @@ +use std::{ops::Range, sync::Arc}; + use async_trait::async_trait; use azure_core::{auth::TokenCredential, new_http_client}; use azure_identity::{ClientSecretCredential, DefaultAzureCredential}; @@ -7,9 +9,9 @@ use azure_storage_blobs::{ container::{operations::BlobItem, Container}, prelude::*, }; +use common_io_config::AzureConfig; use futures::{stream::BoxStream, StreamExt, TryStreamExt}; use snafu::{IntoError, ResultExt, Snafu}; -use std::{ops::Range, sync::Arc}; use crate::{ object_io::{FileMetadata, FileType, LSResult, ObjectSource}, @@ -17,7 +19,6 @@ use crate::{ stream_utils::io_stats_on_bytestream, FileFormat, GetResult, }; -use common_io_config::AzureConfig; const AZURE_DELIMITER: &str = "/"; const DEFAULT_GLOB_FANOUT_LIMIT: usize = 1024; diff --git a/src/daft-io/src/google_cloud.rs b/src/daft-io/src/google_cloud.rs index e9d9b2f9d1..5435fedeb2 100644 --- a/src/daft-io/src/google_cloud.rs +++ b/src/daft-io/src/google_cloud.rs @@ -1,31 +1,24 @@ -use std::ops::Range; -use std::sync::Arc; - -use futures::stream::BoxStream; -use futures::TryStreamExt; -use google_cloud_storage::client::google_cloud_auth::credentials::CredentialsFile; -use google_cloud_storage::client::ClientConfig; -use google_cloud_token::{TokenSource, TokenSourceProvider}; +use std::{ops::Range, sync::Arc}; use async_trait::async_trait; -use google_cloud_storage::client::Client; -use google_cloud_storage::http::objects::get::GetObjectRequest; - -use google_cloud_storage::http::objects::list::ListObjectsRequest; -use google_cloud_storage::http::Error as GError; -use snafu::IntoError; -use snafu::ResultExt; -use snafu::Snafu; - -use crate::object_io::FileMetadata; -use crate::object_io::FileType; -use crate::object_io::LSResult; -use crate::object_io::ObjectSource; -use crate::stats::IOStatsRef; -use crate::stream_utils::io_stats_on_bytestream; -use crate::FileFormat; -use crate::GetResult; use common_io_config::GCSConfig; +use futures::{stream::BoxStream, TryStreamExt}; +use google_cloud_storage::{ + client::{google_cloud_auth::credentials::CredentialsFile, Client, ClientConfig}, + http::{ + objects::{get::GetObjectRequest, list::ListObjectsRequest}, + Error as GError, + }, +}; +use google_cloud_token::{TokenSource, TokenSourceProvider}; +use snafu::{IntoError, ResultExt, Snafu}; + +use crate::{ + object_io::{FileMetadata, FileType, LSResult, ObjectSource}, + stats::IOStatsRef, + stream_utils::io_stats_on_bytestream, + FileFormat, GetResult, +}; const GCS_DELIMITER: &str = "/"; const GCS_SCHEME: &str = "gs"; diff --git a/src/daft-io/src/http.rs b/src/daft-io/src/http.rs index aa6deb050f..1f3a3bef11 100644 --- a/src/daft-io/src/http.rs +++ b/src/daft-io/src/http.rs @@ -3,7 +3,6 @@ use std::{num::ParseIntError, ops::Range, string::FromUtf8Error, sync::Arc}; use async_trait::async_trait; use common_io_config::HTTPConfig; use futures::{stream::BoxStream, TryStreamExt}; - use hyper::header; use lazy_static::lazy_static; use regex::Regex; @@ -11,6 +10,7 @@ use reqwest::header::{CONTENT_LENGTH, RANGE}; use snafu::{IntoError, ResultExt, Snafu}; use url::Position; +use super::object_io::{GetResult, ObjectSource}; use crate::{ object_io::{FileMetadata, FileType, LSResult}, stats::IOStatsRef, @@ -18,8 +18,6 @@ use crate::{ FileFormat, }; -use super::object_io::{GetResult, ObjectSource}; - const HTTP_DELIMITER: &str = "/"; lazy_static! { @@ -353,9 +351,7 @@ mod tests { use std::default; - use crate::object_io::ObjectSource; - use crate::HttpSource; - use crate::Result; + use crate::{object_io::ObjectSource, HttpSource, Result}; #[tokio::test] async fn test_full_get_from_http() -> Result<()> { diff --git a/src/daft-io/src/huggingface.rs b/src/daft-io/src/huggingface.rs index 9e89327c80..3aacea186d 100644 --- a/src/daft-io/src/huggingface.rs +++ b/src/daft-io/src/huggingface.rs @@ -9,14 +9,15 @@ use futures::{ stream::{self, BoxStream}, StreamExt, TryStreamExt, }; - use hyper::header; use reqwest::{ header::{CONTENT_LENGTH, RANGE}, Client, }; +use serde::{Deserialize, Serialize}; use snafu::{IntoError, ResultExt, Snafu}; +use super::object_io::{GetResult, ObjectSource}; use crate::{ http::HttpSource, object_io::{FileMetadata, FileType, LSResult}, @@ -24,9 +25,6 @@ use crate::{ stream_utils::io_stats_on_bytestream, FileFormat, }; -use serde::{Deserialize, Serialize}; - -use super::object_io::{GetResult, ObjectSource}; #[derive(Debug, Snafu)] enum Error { diff --git a/src/daft-io/src/lib.rs b/src/daft-io/src/lib.rs index 84e57790ad..f5c5834541 100644 --- a/src/daft-io/src/lib.rs +++ b/src/daft-io/src/lib.rs @@ -21,32 +21,29 @@ use lazy_static::lazy_static; #[cfg(feature = "python")] pub mod python; +use std::{ + borrow::Cow, + collections::HashMap, + future::Future, + hash::Hash, + ops::Range, + panic::AssertUnwindSafe, + sync::{Arc, OnceLock}, +}; + +use common_error::{DaftError, DaftResult}; pub use common_io_config::{AzureConfig, IOConfig, S3Config}; -pub use object_io::FileMetadata; -pub use object_io::GetResult; +use futures::stream::BoxStream; use object_io::StreamingRetryParams; +pub use object_io::{FileMetadata, GetResult}; #[cfg(feature = "python")] pub use python::register_modules; +use s3_like::S3LikeSource; +use snafu::{prelude::*, Snafu}; pub use stats::{IOStatsContext, IOStatsRef}; -use tokio::runtime::RuntimeFlavor; -use tokio::task::JoinHandle; - -use std::future::Future; -use std::panic::AssertUnwindSafe; -use std::sync::Arc; -use std::sync::OnceLock; -use std::{borrow::Cow, collections::HashMap, hash::Hash, ops::Range}; - -use futures::stream::BoxStream; - -use snafu::Snafu; +use tokio::{runtime::RuntimeFlavor, task::JoinHandle}; use url::ParseError; -use snafu::prelude::*; - -use common_error::{DaftError, DaftResult}; -use s3_like::S3LikeSource; - use self::{http::HttpSource, local::LocalSource, object_io::ObjectSource}; #[derive(Debug, Snafu)] diff --git a/src/daft-io/src/local.rs b/src/daft-io/src/local.rs index d468faa798..fb32d65b27 100644 --- a/src/daft-io/src/local.rs +++ b/src/daft-io/src/local.rs @@ -1,23 +1,27 @@ -use std::io::{SeekFrom, Write}; -use std::ops::Range; -use std::path::PathBuf; +use std::{ + io::{SeekFrom, Write}, + ops::Range, + path::PathBuf, + sync::Arc, +}; -use crate::object_io::{self, FileMetadata, LSResult}; -use crate::stats::IOStatsRef; -use crate::FileFormat; - -use super::object_io::{GetResult, ObjectSource}; -use super::Result; use async_trait::async_trait; use bytes::Bytes; use common_error::DaftError; -use futures::stream::BoxStream; -use futures::StreamExt; -use futures::TryStreamExt; +use futures::{stream::BoxStream, StreamExt, TryStreamExt}; use snafu::{ResultExt, Snafu}; -use std::sync::Arc; use tokio::io::{AsyncReadExt, AsyncSeekExt}; +use super::{ + object_io::{GetResult, ObjectSource}, + Result, +}; +use crate::{ + object_io::{self, FileMetadata, LSResult}, + stats::IOStatsRef, + FileFormat, +}; + /// NOTE: We hardcode this even for Windows /// /// For the most part, Windows machines work quite well with POSIX-style paths @@ -371,12 +375,12 @@ pub(crate) async fn collect_file(local_file: LocalFile) -> Result { #[cfg(test)] mod tests { - use std::default; - use std::io::Write; + use std::{default, io::Write}; - use crate::object_io::{FileMetadata, FileType, ObjectSource}; - use crate::Result; - use crate::{HttpSource, LocalSource}; + use crate::{ + object_io::{FileMetadata, FileType, ObjectSource}, + HttpSource, LocalSource, Result, + }; async fn write_remote_parquet_to_local_file( f: &mut tempfile::NamedTempFile, diff --git a/src/daft-io/src/object_io.rs b/src/daft-io/src/object_io.rs index 7a51ceb4b7..d3fa97601a 100644 --- a/src/daft-io/src/object_io.rs +++ b/src/daft-io/src/object_io.rs @@ -1,18 +1,19 @@ -use std::ops::Range; -use std::sync::Arc; -use std::time::Duration; +use std::{ops::Range, sync::Arc, time::Duration}; use async_trait::async_trait; use bytes::Bytes; use common_error::DaftError; -use futures::stream::{BoxStream, Stream}; -use futures::StreamExt; - +use futures::{ + stream::{BoxStream, Stream}, + StreamExt, +}; use tokio::sync::OwnedSemaphorePermit; -use crate::local::{collect_file, LocalFile}; -use crate::stats::IOStatsRef; -use crate::FileFormat; +use crate::{ + local::{collect_file, LocalFile}, + stats::IOStatsRef, + FileFormat, +}; pub struct StreamingRetryParams { source: Arc, diff --git a/src/daft-io/src/object_store_glob.rs b/src/daft-io/src/object_store_glob.rs index f5d874998e..58261d5fbf 100644 --- a/src/daft-io/src/object_store_glob.rs +++ b/src/daft-io/src/object_store_glob.rs @@ -1,11 +1,11 @@ -use async_stream::stream; -use futures::stream::{BoxStream, StreamExt}; -use itertools::Itertools; use std::{collections::HashSet, path::Path, sync::Arc}; -use tokio::sync::mpsc::Sender; +use async_stream::stream; +use futures::stream::{BoxStream, StreamExt}; use globset::{GlobBuilder, GlobMatcher}; +use itertools::Itertools; use lazy_static::lazy_static; +use tokio::sync::mpsc::Sender; use crate::{ object_io::{FileMetadata, FileType, ObjectSource}, diff --git a/src/daft-io/src/python.rs b/src/daft-io/src/python.rs index f573a1bc6f..484911b6b4 100644 --- a/src/daft-io/src/python.rs +++ b/src/daft-io/src/python.rs @@ -2,11 +2,12 @@ pub use common_io_config::python::{AzureConfig, GCSConfig, IOConfig}; pub use py::register_modules; mod py { - use crate::{get_io_client, get_runtime, parse_url, s3_like, stats::IOStatsContext}; use common_error::DaftResult; use futures::TryStreamExt; use pyo3::{prelude::*, types::PyDict}; + use crate::{get_io_client, get_runtime, parse_url, s3_like, stats::IOStatsContext}; + #[pyfunction] fn io_glob( py: Python, diff --git a/src/daft-io/src/s3_like.rs b/src/daft-io/src/s3_like.rs index e84d0e31fa..97c9641e58 100644 --- a/src/daft-io/src/s3_like.rs +++ b/src/daft-io/src/s3_like.rs @@ -1,46 +1,43 @@ -use async_trait::async_trait; -use aws_config::meta::credentials::CredentialsProviderChain; -use aws_config::retry::RetryMode; -use aws_config::timeout::TimeoutConfig; -use aws_sdk_s3::operation::put_object::PutObjectError; -use aws_smithy_async::rt::sleep::TokioSleep; -use futures::stream::BoxStream; -use reqwest::StatusCode; -use s3::operation::head_object::HeadObjectError; -use s3::operation::list_objects_v2::ListObjectsV2Error; -use tokio::sync::{OwnedSemaphorePermit, SemaphorePermit}; +use std::{collections::HashMap, io, ops::Range, string::FromUtf8Error, sync::Arc, time::Duration}; -use crate::object_io::{FileMetadata, FileType, LSResult}; -use crate::stats::IOStatsRef; -use crate::stream_utils::io_stats_on_bytestream; -use crate::{get_io_pool_num_threads, FileFormat, InvalidArgumentSnafu, SourceType}; -use aws_config::SdkConfig; -use aws_credential_types::cache::{ - CredentialsCache, ProvideCachedCredentials, SharedCredentialsCache, +use async_recursion::async_recursion; +use async_trait::async_trait; +use aws_config::{ + meta::credentials::CredentialsProviderChain, retry::RetryMode, timeout::TimeoutConfig, + SdkConfig, +}; +use aws_credential_types::{ + cache::{CredentialsCache, ProvideCachedCredentials, SharedCredentialsCache}, + provider::error::CredentialsError, }; -use aws_credential_types::provider::error::CredentialsError; +use aws_sdk_s3 as s3; +use aws_sdk_s3::{operation::put_object::PutObjectError, primitives::ByteStreamError}; use aws_sig_auth::signer::SigningRequirements; +use aws_smithy_async::rt::sleep::TokioSleep; use common_io_config::S3Config; -use futures::{StreamExt, TryStreamExt}; -use s3::client::customize::Response; -use s3::config::{Credentials, Region}; -use s3::error::{DisplayErrorContext, SdkError}; -use s3::operation::get_object::GetObjectError; +use futures::{stream::BoxStream, StreamExt, TryStreamExt}; +use reqwest::StatusCode; +use s3::{ + client::customize::Response, + config::{Credentials, Region}, + error::{DisplayErrorContext, SdkError}, + operation::{ + get_object::GetObjectError, head_object::HeadObjectError, + list_objects_v2::ListObjectsV2Error, + }, +}; use snafu::{ensure, IntoError, ResultExt, Snafu}; +use tokio::sync::{OwnedSemaphorePermit, SemaphorePermit}; use url::{ParseError, Position}; use super::object_io::{GetResult, ObjectSource}; -use async_recursion::async_recursion; -use aws_sdk_s3 as s3; -use aws_sdk_s3::primitives::ByteStreamError; - -use std::collections::HashMap; - -use std::io; -use std::ops::Range; -use std::string::FromUtf8Error; -use std::sync::Arc; -use std::time::Duration; +use crate::{ + get_io_pool_num_threads, + object_io::{FileMetadata, FileType, LSResult}, + stats::IOStatsRef, + stream_utils::io_stats_on_bytestream, + FileFormat, InvalidArgumentSnafu, SourceType, +}; const S3_DELIMITER: &str = "/"; const DEFAULT_GLOB_FANOUT_LIMIT: usize = 1024; @@ -336,8 +333,7 @@ fn handle_https_client_settings( http_connector, tls_connector.into(), )); - use aws_smithy_client::http_connector::ConnectorSettings; - use aws_smithy_client::hyper_ext; + use aws_smithy_client::{http_connector::ConnectorSettings, hyper_ext}; let smithy_client = hyper_ext::Adapter::builder() .connector_settings( ConnectorSettings::builder() @@ -1187,11 +1183,10 @@ impl ObjectSource for S3LikeSource { #[cfg(test)] mod tests { - use crate::object_io::ObjectSource; - use crate::Result; - use crate::S3LikeSource; use common_io_config::S3Config; + use crate::{object_io::ObjectSource, Result, S3LikeSource}; + #[tokio::test] async fn test_full_get_from_s3() -> Result<()> { let parquet_file_path = "s3://daft-public-data/test_fixtures/parquet_small/0dad4c3f-da0d-49db-90d8-98684571391b-0.parquet"; diff --git a/src/daft-io/src/stream_utils.rs b/src/daft-io/src/stream_utils.rs index 1460d3ebe3..4ed42811d3 100644 --- a/src/daft-io/src/stream_utils.rs +++ b/src/daft-io/src/stream_utils.rs @@ -1,9 +1,8 @@ use bytes::Bytes; +use futures::{stream::BoxStream, StreamExt}; use crate::stats::{IOStatsByteStreamContextHandle, IOStatsRef}; -use futures::{stream::BoxStream, StreamExt}; - pub(crate) fn io_stats_on_bytestream( mut s: impl futures::stream::Stream> + Unpin diff --git a/src/daft-json/src/decoding.rs b/src/daft-json/src/decoding.rs index 9eccd0304c..65f090f88b 100644 --- a/src/daft-json/src/decoding.rs +++ b/src/daft-json/src/decoding.rs @@ -1,14 +1,17 @@ -use crate::deserializer::Value as BorrowedValue; -use arrow2::array::{ - Array, MutableArray, MutableBooleanArray, MutableFixedSizeListArray, MutableListArray, - MutableNullArray, MutablePrimitiveArray, MutableStructArray, MutableUtf8Array, +use std::{borrow::Borrow, fmt::Write}; + +use arrow2::{ + array::{ + Array, MutableArray, MutableBooleanArray, MutableFixedSizeListArray, MutableListArray, + MutableNullArray, MutablePrimitiveArray, MutableStructArray, MutableUtf8Array, + }, + bitmap::MutableBitmap, + datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit}, + error::{Error, Result}, + offset::Offsets, + temporal_conversions, + types::{f16, NativeType, Offset}, }; -use arrow2::bitmap::MutableBitmap; -use arrow2::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit}; -use arrow2::error::{Error, Result}; -use arrow2::offset::Offsets; -use arrow2::temporal_conversions; -use arrow2::types::{f16, NativeType, Offset}; use chrono::{Datelike, Timelike}; use daft_decoding::deserialize::{ deserialize_datetime, deserialize_naive_date, deserialize_naive_datetime, @@ -17,8 +20,8 @@ use daft_decoding::deserialize::{ use indexmap::IndexMap; use num_traits::NumCast; use simd_json::StaticNode; -use std::borrow::Borrow; -use std::fmt::Write; + +use crate::deserializer::Value as BorrowedValue; const JSON_NULL_VALUE: BorrowedValue = BorrowedValue::Static(StaticNode::Null); /// Deserialize chunk of JSON records into a chunk of Arrow2 arrays. pub(crate) fn deserialize_records<'a, A: Borrow>>( diff --git a/src/daft-json/src/inference.rs b/src/daft-json/src/inference.rs index 79caea435e..76569aecc0 100644 --- a/src/daft-json/src/inference.rs +++ b/src/daft-json/src/inference.rs @@ -1,12 +1,14 @@ use std::{borrow::Borrow, collections::HashSet}; -use crate::deserializer::{Object, Value as BorrowedValue}; -use arrow2::datatypes::{DataType, Field, Metadata, Schema, TimeUnit}; -use arrow2::error::{Error, Result}; +use arrow2::{ + datatypes::{DataType, Field, Metadata, Schema, TimeUnit}, + error::{Error, Result}, +}; use indexmap::IndexMap; - use simd_json::StaticNode; +use crate::deserializer::{Object, Value as BorrowedValue}; + const ITEM_NAME: &str = "item"; /// Infer Arrow2 schema from JSON Value record. diff --git a/src/daft-json/src/local.rs b/src/daft-json/src/local.rs index 9777020a53..224c94f24f 100644 --- a/src/daft-json/src/local.rs +++ b/src/daft-json/src/local.rs @@ -2,12 +2,10 @@ use std::{borrow::Cow, collections::HashSet, num::NonZeroUsize, sync::Arc}; use common_error::DaftResult; use daft_core::{prelude::*, utils::arrow::cast_array_for_daft_if_needed}; - use daft_dsl::Expr; use daft_table::Table; use indexmap::IndexMap; use num_traits::Pow; - use rayon::{prelude::*, ThreadPoolBuilder}; use serde_json::value::RawValue; use snafu::ResultExt; @@ -440,11 +438,12 @@ fn next_line_position(input: &[u8]) -> Option { #[cfg(test)] mod tests { - use super::*; use arrow2::datatypes::{ DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema, }; + use super::*; + #[test] fn test_infer_schema() { let json = r#" diff --git a/src/daft-json/src/read.rs b/src/daft-json/src/read.rs index ed9553179d..7396e6ca04 100644 --- a/src/daft-json/src/read.rs +++ b/src/daft-json/src/read.rs @@ -1,6 +1,7 @@ use std::{collections::HashMap, num::NonZeroUsize, sync::Arc}; use common_error::{DaftError, DaftResult}; +use daft_compression::CompressionCodec; use daft_core::{prelude::*, utils::arrow::cast_array_for_daft_if_needed}; use daft_dsl::optimization::get_required_columns; use daft_io::{get_runtime, parse_url, GetResult, IOClient, IOStatsRef, SourceType}; @@ -18,11 +19,10 @@ use tokio::{ }; use tokio_util::io::StreamReader; -use crate::{decoding::deserialize_records, local::read_json_local, ArrowSnafu, ChunkSnafu}; use crate::{ - schema::read_json_schema_single, JsonConvertOptions, JsonParseOptions, JsonReadOptions, + decoding::deserialize_records, local::read_json_local, schema::read_json_schema_single, + ArrowSnafu, ChunkSnafu, JsonConvertOptions, JsonParseOptions, JsonReadOptions, }; -use daft_compression::CompressionCodec; type TableChunkResult = super::Result>, super::JoinSnafu, super::Error>>; @@ -563,24 +563,21 @@ mod tests { use std::{collections::HashSet, io::BufRead, sync::Arc}; use common_error::DaftResult; - use daft_core::{ prelude::*, utils::arrow::{cast_array_for_daft_if_needed, cast_array_from_daft_if_needed}, }; - use daft_io::{IOClient, IOConfig}; use daft_table::Table; use indexmap::IndexMap; use rstest::rstest; + use super::read_json; use crate::{ decoding::deserialize_records, inference::{column_types_map_to_fields, infer_records_schema}, + JsonConvertOptions, JsonReadOptions, }; - use crate::{JsonConvertOptions, JsonReadOptions}; - - use super::read_json; fn check_equal_local_arrow2( path: &str, diff --git a/src/daft-json/src/schema.rs b/src/daft-json/src/schema.rs index 6d676a5603..5a8e37aa85 100644 --- a/src/daft-json/src/schema.rs +++ b/src/daft-json/src/schema.rs @@ -1,6 +1,7 @@ use std::{collections::HashSet, sync::Arc}; use common_error::DaftResult; +use daft_compression::CompressionCodec; use daft_core::prelude::Schema; use daft_io::{get_runtime, GetResult, IOClient, IOStatsRef}; use futures::{StreamExt, TryStreamExt}; @@ -16,7 +17,6 @@ use crate::{ inference::{column_types_map_to_fields, infer_records_schema}, ArrowSnafu, JsonParseOptions, StdIOSnafu, }; -use daft_compression::CompressionCodec; #[derive(Debug, Clone)] pub struct JsonReadStats { @@ -198,7 +198,6 @@ mod tests { use common_error::DaftResult; use daft_core::prelude::*; - use daft_io::{IOClient, IOConfig}; use rstest::rstest; diff --git a/src/daft-local-execution/src/intermediate_ops/aggregate.rs b/src/daft-local-execution/src/intermediate_ops/aggregate.rs index 39750d1401..13f93cb818 100644 --- a/src/daft-local-execution/src/intermediate_ops/aggregate.rs +++ b/src/daft-local-execution/src/intermediate_ops/aggregate.rs @@ -4,11 +4,10 @@ use common_error::DaftResult; use daft_dsl::ExprRef; use tracing::instrument; -use crate::pipeline::PipelineResultType; - use super::intermediate_op::{ IntermediateOperator, IntermediateOperatorResult, IntermediateOperatorState, }; +use crate::pipeline::PipelineResultType; pub struct AggregateOperator { agg_exprs: Vec, diff --git a/src/daft-local-execution/src/intermediate_ops/buffer.rs b/src/daft-local-execution/src/intermediate_ops/buffer.rs index e0301d90ec..67b17c5380 100644 --- a/src/daft-local-execution/src/intermediate_ops/buffer.rs +++ b/src/daft-local-execution/src/intermediate_ops/buffer.rs @@ -1,8 +1,7 @@ -use std::{collections::VecDeque, sync::Arc}; +use std::{cmp::Ordering::*, collections::VecDeque, sync::Arc}; use common_error::DaftResult; use daft_micropartition::MicroPartition; -use std::cmp::Ordering::*; pub struct OperatorBuffer { pub buffer: VecDeque>, diff --git a/src/daft-local-execution/src/intermediate_ops/filter.rs b/src/daft-local-execution/src/intermediate_ops/filter.rs index eeb02c4aff..da8dbcd19c 100644 --- a/src/daft-local-execution/src/intermediate_ops/filter.rs +++ b/src/daft-local-execution/src/intermediate_ops/filter.rs @@ -4,11 +4,10 @@ use common_error::DaftResult; use daft_dsl::ExprRef; use tracing::instrument; -use crate::pipeline::PipelineResultType; - use super::intermediate_op::{ IntermediateOperator, IntermediateOperatorResult, IntermediateOperatorState, }; +use crate::pipeline::PipelineResultType; pub struct FilterOperator { predicate: ExprRef, diff --git a/src/daft-local-execution/src/intermediate_ops/hash_join_probe.rs b/src/daft-local-execution/src/intermediate_ops/hash_join_probe.rs index e749a89cda..cd73ceff47 100644 --- a/src/daft-local-execution/src/intermediate_ops/hash_join_probe.rs +++ b/src/daft-local-execution/src/intermediate_ops/hash_join_probe.rs @@ -7,11 +7,10 @@ use daft_micropartition::MicroPartition; use daft_table::{GrowableTable, ProbeTable, Table}; use tracing::{info_span, instrument}; -use crate::pipeline::PipelineResultType; - use super::intermediate_op::{ IntermediateOperator, IntermediateOperatorResult, IntermediateOperatorState, }; +use crate::pipeline::PipelineResultType; enum HashJoinProbeState { Building, diff --git a/src/daft-local-execution/src/intermediate_ops/intermediate_op.rs b/src/daft-local-execution/src/intermediate_ops/intermediate_op.rs index d4d6ea4456..abb5c5388b 100644 --- a/src/daft-local-execution/src/intermediate_ops/intermediate_op.rs +++ b/src/daft-local-execution/src/intermediate_ops/intermediate_op.rs @@ -5,6 +5,7 @@ use common_error::DaftResult; use daft_micropartition::MicroPartition; use tracing::{info_span, instrument}; +use super::buffer::OperatorBuffer; use crate::{ channel::{create_channel, PipelineChannel, Receiver, Sender}, pipeline::{PipelineNode, PipelineResultType}, @@ -12,8 +13,6 @@ use crate::{ ExecutionRuntimeHandle, NUM_CPUS, }; -use super::buffer::OperatorBuffer; - pub trait IntermediateOperatorState: Send + Sync { fn as_any_mut(&mut self) -> &mut dyn std::any::Any; } diff --git a/src/daft-local-execution/src/intermediate_ops/project.rs b/src/daft-local-execution/src/intermediate_ops/project.rs index 6f4b57ba00..abd37b461f 100644 --- a/src/daft-local-execution/src/intermediate_ops/project.rs +++ b/src/daft-local-execution/src/intermediate_ops/project.rs @@ -4,11 +4,10 @@ use common_error::DaftResult; use daft_dsl::ExprRef; use tracing::instrument; -use crate::pipeline::PipelineResultType; - use super::intermediate_op::{ IntermediateOperator, IntermediateOperatorResult, IntermediateOperatorState, }; +use crate::pipeline::PipelineResultType; pub struct ProjectOperator { projection: Vec, diff --git a/src/daft-local-execution/src/lib.rs b/src/daft-local-execution/src/lib.rs index 5d1fb6d795..732f306768 100644 --- a/src/daft-local-execution/src/lib.rs +++ b/src/daft-local-execution/src/lib.rs @@ -9,8 +9,7 @@ mod sources; use common_error::{DaftError, DaftResult}; use lazy_static::lazy_static; pub use run::NativeExecutor; -use snafu::futures::TryFutureExt; -use snafu::Snafu; +use snafu::{futures::TryFutureExt, Snafu}; lazy_static! { pub static ref NUM_CPUS: usize = std::thread::available_parallelism().unwrap().get(); } diff --git a/src/daft-local-execution/src/pipeline.rs b/src/daft-local-execution/src/pipeline.rs index 8e39a1e704..e1f8633f94 100644 --- a/src/daft-local-execution/src/pipeline.rs +++ b/src/daft-local-execution/src/pipeline.rs @@ -1,5 +1,22 @@ use std::{collections::HashMap, sync::Arc}; +use common_display::{mermaid::MermaidDisplayVisitor, tree::TreeDisplay}; +use common_error::DaftResult; +use daft_core::{ + datatypes::Field, + prelude::{Schema, SchemaRef}, + utils::supertype, +}; +use daft_dsl::Expr; +use daft_micropartition::MicroPartition; +use daft_physical_plan::{ + Filter, HashAggregate, HashJoin, InMemoryScan, Limit, LocalPhysicalPlan, Project, Sort, + UnGroupedAggregate, +}; +use daft_plan::populate_aggregation_stages; +use daft_table::{ProbeTable, Table}; +use snafu::ResultExt; + use crate::{ channel::PipelineChannel, intermediate_ops::{ @@ -16,22 +33,6 @@ use crate::{ ExecutionRuntimeHandle, PipelineCreationSnafu, }; -use common_display::{mermaid::MermaidDisplayVisitor, tree::TreeDisplay}; -use common_error::DaftResult; -use daft_core::{datatypes::Field, utils::supertype}; - -use daft_core::prelude::{Schema, SchemaRef}; - -use daft_dsl::Expr; -use daft_micropartition::MicroPartition; -use daft_physical_plan::{ - Filter, HashAggregate, HashJoin, InMemoryScan, Limit, LocalPhysicalPlan, Project, Sort, - UnGroupedAggregate, -}; -use daft_plan::populate_aggregation_stages; -use daft_table::{ProbeTable, Table}; -use snafu::ResultExt; - #[derive(Clone)] pub enum PipelineResultType { Data(Arc), @@ -98,8 +99,9 @@ pub fn physical_plan_to_pipeline( physical_plan: &LocalPhysicalPlan, psets: &HashMap>>, ) -> crate::Result> { - use crate::sources::scan_task::ScanTaskSource; use daft_physical_plan::PhysicalScan; + + use crate::sources::scan_task::ScanTaskSource; let out: Box = match physical_plan { LocalPhysicalPlan::PhysicalScan(PhysicalScan { scan_tasks, .. }) => { let scan_task_source = ScanTaskSource::new(scan_tasks.clone()); diff --git a/src/daft-local-execution/src/run.rs b/src/daft-local-execution/src/run.rs index 182b298457..38d7c3e479 100644 --- a/src/daft-local-execution/src/run.rs +++ b/src/daft-local-execution/src/run.rs @@ -1,8 +1,3 @@ -use common_daft_config::DaftExecutionConfig; -use common_error::DaftResult; -use common_tracing::refresh_chrome_trace; -use daft_micropartition::MicroPartition; -use daft_physical_plan::{translate, LocalPhysicalPlan}; use std::{ collections::HashMap, fs::File, @@ -14,6 +9,11 @@ use std::{ time::{SystemTime, UNIX_EPOCH}, }; +use common_daft_config::DaftExecutionConfig; +use common_error::DaftResult; +use common_tracing::refresh_chrome_trace; +use daft_micropartition::MicroPartition; +use daft_physical_plan::{translate, LocalPhysicalPlan}; #[cfg(feature = "python")] use { common_daft_config::PyDaftExecutionConfig, diff --git a/src/daft-local-execution/src/runtime_stats.rs b/src/daft-local-execution/src/runtime_stats.rs index bcd9e4a1f5..7489a8fd36 100644 --- a/src/daft-local-execution/src/runtime_stats.rs +++ b/src/daft-local-execution/src/runtime_stats.rs @@ -34,8 +34,7 @@ impl RuntimeStats { emitted: bool, cpu_time: bool, ) -> Result<(), fmt::Error> { - use num_format::Locale; - use num_format::ToFormattedString; + use num_format::{Locale, ToFormattedString}; if received { writeln!( w, diff --git a/src/daft-local-execution/src/sinks/aggregate.rs b/src/daft-local-execution/src/sinks/aggregate.rs index 33163758ef..eae85a3f21 100644 --- a/src/daft-local-execution/src/sinks/aggregate.rs +++ b/src/daft-local-execution/src/sinks/aggregate.rs @@ -5,9 +5,8 @@ use daft_dsl::ExprRef; use daft_micropartition::MicroPartition; use tracing::instrument; -use crate::pipeline::PipelineResultType; - use super::blocking_sink::{BlockingSink, BlockingSinkStatus}; +use crate::pipeline::PipelineResultType; enum AggregateState { Accumulating(Vec>), diff --git a/src/daft-local-execution/src/sinks/hash_join_build.rs b/src/daft-local-execution/src/sinks/hash_join_build.rs index d5ca26f713..4782a0fbfd 100644 --- a/src/daft-local-execution/src/sinks/hash_join_build.rs +++ b/src/daft-local-execution/src/sinks/hash_join_build.rs @@ -1,13 +1,13 @@ use std::sync::Arc; -use crate::pipeline::PipelineResultType; use common_error::DaftResult; use daft_core::prelude::SchemaRef; use daft_dsl::ExprRef; use daft_micropartition::MicroPartition; +use daft_table::{ProbeTable, ProbeTableBuilder, Table}; use super::blocking_sink::{BlockingSink, BlockingSinkStatus}; -use daft_table::{ProbeTable, ProbeTableBuilder, Table}; +use crate::pipeline::PipelineResultType; enum ProbeTableState { Building { diff --git a/src/daft-local-execution/src/sources/in_memory.rs b/src/daft-local-execution/src/sources/in_memory.rs index 1a0719c3d0..1212dd13cb 100644 --- a/src/daft-local-execution/src/sources/in_memory.rs +++ b/src/daft-local-execution/src/sources/in_memory.rs @@ -1,12 +1,11 @@ use std::sync::Arc; -use crate::ExecutionRuntimeHandle; use daft_io::IOStatsRef; use daft_micropartition::MicroPartition; use tracing::instrument; use super::source::Source; -use crate::sources::source::SourceStream; +use crate::{sources::source::SourceStream, ExecutionRuntimeHandle}; pub struct InMemorySource { data: Vec>, diff --git a/src/daft-local-execution/src/sources/scan_task.rs b/src/daft-local-execution/src/sources/scan_task.rs index 0070e17ae2..f7374fa80a 100644 --- a/src/daft-local-execution/src/sources/scan_task.rs +++ b/src/daft-local-execution/src/sources/scan_task.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use common_error::DaftResult; use common_file_formats::{FileFormatConfig, ParquetSourceConfig}; use daft_csv::{CsvConvertOptions, CsvParseOptions, CsvReadOptions}; @@ -7,8 +9,8 @@ use daft_micropartition::MicroPartition; use daft_parquet::read::ParquetSchemaInferenceOptions; use daft_scan::{storage_config::StorageConfig, ChunkSpec, ScanTask}; use futures::{Stream, StreamExt}; -use std::sync::Arc; use tokio_stream::wrappers::ReceiverStream; +use tracing::instrument; use crate::{ channel::{create_channel, Sender}, @@ -16,8 +18,6 @@ use crate::{ ExecutionRuntimeHandle, }; -use tracing::instrument; - pub struct ScanTaskSource { scan_tasks: Vec>, } diff --git a/src/daft-micropartition/src/micropartition.rs b/src/daft-micropartition/src/micropartition.rs index 261124c099..cc8439583c 100644 --- a/src/daft-micropartition/src/micropartition.rs +++ b/src/daft-micropartition/src/micropartition.rs @@ -1,7 +1,9 @@ -use std::collections::{BTreeMap, HashMap, HashSet}; -use std::fmt::Display; -use std::sync::Arc; -use std::{ops::Deref, sync::Mutex}; +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + fmt::Display, + ops::Deref, + sync::{Arc, Mutex}, +}; use arrow2::io::parquet::read::schema::infer_schema_with_options; use common_error::DaftResult; @@ -9,25 +11,24 @@ use common_file_formats::{CsvSourceConfig, FileFormatConfig, ParquetSourceConfig use daft_core::prelude::*; use daft_csv::{CsvConvertOptions, CsvParseOptions, CsvReadOptions}; use daft_dsl::ExprRef; +use daft_io::{get_runtime, IOClient, IOConfig, IOStatsContext, IOStatsRef}; use daft_json::{JsonConvertOptions, JsonParseOptions, JsonReadOptions}; use daft_parquet::read::{ read_parquet_bulk, read_parquet_metadata_bulk, ParquetSchemaInferenceOptions, }; -use daft_scan::storage_config::{NativeStorageConfig, StorageConfig}; -use daft_scan::{ChunkSpec, DataSource, Pushdowns, ScanTask}; +use daft_scan::{ + storage_config::{NativeStorageConfig, StorageConfig}, + ChunkSpec, DataSource, Pushdowns, ScanTask, +}; +use daft_stats::{PartitionSpec, TableMetadata, TableStatistics}; use daft_table::Table; - -use crate::{DaftCSVSnafu, DaftCoreComputeSnafu}; use parquet2::metadata::FileMetaData; use snafu::ResultExt; - -use daft_io::{get_runtime, IOClient, IOConfig, IOStatsContext, IOStatsRef}; -use daft_stats::TableStatistics; -use daft_stats::{PartitionSpec, TableMetadata}; - #[cfg(feature = "python")] use {crate::PyIOSnafu, common_file_formats::DatabaseSourceConfig}; +use crate::{DaftCSVSnafu, DaftCoreComputeSnafu}; + #[derive(Debug)] pub(crate) enum TableState { Unloaded(Arc), diff --git a/src/daft-micropartition/src/ops/concat.rs b/src/daft-micropartition/src/ops/concat.rs index 8e23360d36..904e68324a 100644 --- a/src/daft-micropartition/src/ops/concat.rs +++ b/src/daft-micropartition/src/ops/concat.rs @@ -2,11 +2,10 @@ use std::sync::Mutex; use common_error::{DaftError, DaftResult}; use daft_io::IOStatsContext; +use daft_stats::TableMetadata; use crate::micropartition::{MicroPartition, TableState}; -use daft_stats::TableMetadata; - impl MicroPartition { pub fn concat(mps: &[&Self]) -> DaftResult { if mps.is_empty() { diff --git a/src/daft-micropartition/src/ops/eval_expressions.rs b/src/daft-micropartition/src/ops/eval_expressions.rs index 8479b560b7..7a9b8bed0e 100644 --- a/src/daft-micropartition/src/ops/eval_expressions.rs +++ b/src/daft-micropartition/src/ops/eval_expressions.rs @@ -4,12 +4,11 @@ use common_error::{DaftError, DaftResult}; use daft_core::prelude::Schema; use daft_dsl::ExprRef; use daft_io::IOStatsContext; +use daft_stats::{ColumnRangeStatistics, TableStatistics}; use snafu::ResultExt; use crate::{micropartition::MicroPartition, DaftCoreComputeSnafu}; -use daft_stats::{ColumnRangeStatistics, TableStatistics}; - fn infer_schema(exprs: &[ExprRef], schema: &Schema) -> DaftResult { let fields = exprs .iter() diff --git a/src/daft-micropartition/src/ops/filter.rs b/src/daft-micropartition/src/ops/filter.rs index ae75803252..a097192f30 100644 --- a/src/daft-micropartition/src/ops/filter.rs +++ b/src/daft-micropartition/src/ops/filter.rs @@ -1,12 +1,11 @@ use common_error::DaftResult; use daft_dsl::ExprRef; use daft_io::IOStatsContext; +use daft_stats::TruthValue; use snafu::ResultExt; use crate::{micropartition::MicroPartition, DaftCoreComputeSnafu}; -use daft_stats::TruthValue; - impl MicroPartition { pub fn filter(&self, predicate: &[ExprRef]) -> DaftResult { let io_stats = IOStatsContext::new("MicroPartition::filter"); diff --git a/src/daft-micropartition/src/ops/join.rs b/src/daft-micropartition/src/ops/join.rs index eda430c3fd..aef268d669 100644 --- a/src/daft-micropartition/src/ops/join.rs +++ b/src/daft-micropartition/src/ops/join.rs @@ -4,12 +4,11 @@ use common_error::DaftResult; use daft_core::{array::ops::DaftCompare, join::JoinType}; use daft_dsl::{join::infer_join_schema, ExprRef}; use daft_io::IOStatsContext; +use daft_stats::TruthValue; use daft_table::Table; use crate::micropartition::MicroPartition; -use daft_stats::TruthValue; - impl MicroPartition { fn join( &self, diff --git a/src/daft-micropartition/src/python.rs b/src/daft-micropartition/src/python.rs index 651b6c1556..03423d1a37 100644 --- a/src/daft-micropartition/src/python.rs +++ b/src/daft-micropartition/src/python.rs @@ -4,26 +4,22 @@ use std::{ }; use common_error::DaftResult; -use daft_core::prelude::*; - -use daft_core::python::PySeries; -use daft_core::python::{PySchema, PyTimeUnit}; - +use daft_core::{ + prelude::*, + python::{PySchema, PySeries, PyTimeUnit}, +}; use daft_csv::{CsvConvertOptions, CsvParseOptions, CsvReadOptions}; use daft_dsl::python::PyExpr; use daft_io::{python::IOConfig, IOStatsContext}; use daft_json::{JsonConvertOptions, JsonParseOptions, JsonReadOptions}; use daft_parquet::read::ParquetSchemaInferenceOptions; use daft_scan::{python::pylib::PyScanTask, storage_config::PyStorageConfig, ScanTask}; -use daft_stats::TableStatistics; +use daft_stats::{TableMetadata, TableStatistics}; use daft_table::python::PyTable; -use pyo3::{exceptions::PyValueError, prelude::*, types::PyBytes}; +use pyo3::{exceptions::PyValueError, prelude::*, types::PyBytes, PyTypeInfo}; use crate::micropartition::{MicroPartition, TableState}; -use daft_stats::TableMetadata; -use pyo3::PyTypeInfo; - #[pyclass(module = "daft.daft", frozen)] #[derive(Clone)] pub struct PyMicroPartition { diff --git a/src/daft-minhash/benches/minhash.rs b/src/daft-minhash/benches/minhash.rs index 5fe3f72688..6e51cdf850 100644 --- a/src/daft-minhash/benches/minhash.rs +++ b/src/daft-minhash/benches/minhash.rs @@ -2,8 +2,9 @@ extern crate test; -use daft_minhash::{load_simd, minhash}; use std::{iter::repeat_with, ops::Range}; + +use daft_minhash::{load_simd, minhash}; use test::Bencher; const N_TOKENS: usize = 10000; diff --git a/src/daft-parquet/src/file.rs b/src/daft-parquet/src/file.rs index d5bb1a38dd..3b84579b6d 100644 --- a/src/daft-parquet/src/file.rs +++ b/src/daft-parquet/src/file.rs @@ -3,10 +3,9 @@ use std::{ sync::Arc, }; -use arrow2::io::parquet::read::schema::infer_schema_with_options; +use arrow2::io::parquet::read::{column_iter_to_arrays, schema::infer_schema_with_options}; use common_error::DaftResult; use daft_core::{prelude::*, utils::arrow::cast_array_for_daft_if_needed}; - use daft_dsl::ExprRef; use daft_io::{IOClient, IOStatsRef}; use daft_stats::TruthValue; @@ -29,7 +28,6 @@ use crate::{ UnableToConvertSchemaToDaftSnafu, UnableToCreateParquetPageStreamSnafu, UnableToParseSchemaFromMetadataSnafu, UnableToRunExpressionOnStatsSnafu, }; -use arrow2::io::parquet::read::column_iter_to_arrays; pub(crate) struct ParquetReaderBuilder { pub uri: String, diff --git a/src/daft-parquet/src/metadata.rs b/src/daft-parquet/src/metadata.rs index 5034feda04..dd7c186145 100644 --- a/src/daft-parquet/src/metadata.rs +++ b/src/daft-parquet/src/metadata.rs @@ -4,10 +4,8 @@ use common_error::DaftResult; use daft_core::datatypes::Field; use daft_dsl::common_treenode::{Transformed, TreeNode, TreeNodeRecursion}; use daft_io::{IOClient, IOStatsRef}; - pub use parquet2::metadata::{FileMetaData, RowGroupMetaData}; -use parquet2::schema::types::ParquetType; -use parquet2::{metadata::RowGroupList, read::deserialize_metadata}; +use parquet2::{metadata::RowGroupList, read::deserialize_metadata, schema::types::ParquetType}; use snafu::ResultExt; use crate::{Error, JoinSnafu, UnableToParseMetadataSnafu}; diff --git a/src/daft-parquet/src/python.rs b/src/daft-parquet/src/python.rs index 66c400e9ec..930eb7e91b 100644 --- a/src/daft-parquet/src/python.rs +++ b/src/daft-parquet/src/python.rs @@ -1,14 +1,14 @@ use pyo3::prelude::*; pub mod pylib { + use std::{collections::BTreeMap, sync::Arc}; + use common_arrow_ffi::{field_to_py, to_py_array}; - use daft_core::python::PySeries; - use daft_core::python::{PySchema, PyTimeUnit}; + use daft_core::python::{PySchema, PySeries, PyTimeUnit}; use daft_dsl::python::PyExpr; use daft_io::{get_io_client, python::IOConfig, IOStatsContext}; use daft_table::python::PyTable; use pyo3::{pyfunction, types::PyModule, Bound, PyResult, Python}; - use std::{collections::BTreeMap, sync::Arc}; use crate::read::{ArrowChunk, ParquetSchemaInferenceOptions}; #[allow(clippy::too_many_arguments)] diff --git a/src/daft-parquet/src/read.rs b/src/daft-parquet/src/read.rs index e9d16e901c..38974e2b01 100644 --- a/src/daft-parquet/src/read.rs +++ b/src/daft-parquet/src/read.rs @@ -6,7 +6,6 @@ use std::{ use arrow2::{bitmap::Bitmap, io::parquet::read::schema::infer_schema_with_options}; use common_error::DaftResult; - use daft_core::prelude::*; use daft_dsl::{optimization::get_required_columns, ExprRef}; use daft_io::{get_runtime, parse_url, IOClient, IOStatsRef, SourceType}; @@ -18,10 +17,10 @@ use futures::{ }; use itertools::Itertools; use parquet2::metadata::FileMetaData; +use serde::{Deserialize, Serialize}; use snafu::ResultExt; use crate::{file::ParquetReaderBuilder, JoinSnafu}; -use serde::{Deserialize, Serialize}; #[derive(Clone, Copy, Serialize, Deserialize)] pub struct ParquetSchemaInferenceOptions { @@ -1023,14 +1022,11 @@ mod tests { use std::sync::Arc; use common_error::DaftResult; - use daft_io::{IOClient, IOConfig}; use futures::StreamExt; use parquet2::metadata::FileMetaData; - use super::read_parquet; - use super::read_parquet_metadata; - use super::stream_parquet; + use super::{read_parquet, read_parquet_metadata, stream_parquet}; const PARQUET_FILE: &str = "s3://daft-public-data/test_fixtures/parquet-dev/mvp.parquet"; const PARQUET_FILE_LOCAL: &str = "tests/assets/parquet-data/mvp.parquet"; diff --git a/src/daft-parquet/src/statistics/column_range.rs b/src/daft-parquet/src/statistics/column_range.rs index aab3e92f89..6910eb7ad5 100644 --- a/src/daft-parquet/src/statistics/column_range.rs +++ b/src/daft-parquet/src/statistics/column_range.rs @@ -1,6 +1,5 @@ use arrow2::array::PrimitiveArray; use daft_core::prelude::*; - use daft_stats::ColumnRangeStatistics; use parquet2::{ schema::types::{PhysicalType, PrimitiveConvertedType, TimeUnit}, @@ -10,10 +9,10 @@ use parquet2::{ }; use snafu::{OptionExt, ResultExt}; -use super::{DaftStatsSnafu, MissingParquetColumnStatisticsSnafu, Wrap}; - -use super::utils::*; -use super::UnableToParseUtf8FromBinarySnafu; +use super::{ + utils::*, DaftStatsSnafu, MissingParquetColumnStatisticsSnafu, + UnableToParseUtf8FromBinarySnafu, Wrap, +}; impl TryFrom<&BooleanStatistics> for Wrap { type Error = super::Error; diff --git a/src/daft-parquet/src/statistics/table_stats.rs b/src/daft-parquet/src/statistics/table_stats.rs index ddc306c4ca..ac3ffa71dd 100644 --- a/src/daft-parquet/src/statistics/table_stats.rs +++ b/src/daft-parquet/src/statistics/table_stats.rs @@ -1,12 +1,11 @@ use common_error::DaftResult; use daft_core::prelude::Schema; use daft_stats::{ColumnRangeStatistics, TableStatistics}; +use indexmap::IndexMap; use snafu::ResultExt; use super::column_range::parquet_statistics_to_column_range_statistics; -use indexmap::IndexMap; - pub fn row_group_metadata_to_table_stats( metadata: &crate::metadata::RowGroupMetaData, schema: &Schema, diff --git a/src/daft-parquet/src/stream_reader.rs b/src/daft-parquet/src/stream_reader.rs index 16eed62a18..dd88834aaa 100644 --- a/src/daft-parquet/src/stream_reader.rs +++ b/src/daft-parquet/src/stream_reader.rs @@ -8,14 +8,13 @@ use std::{ use arrow2::io::parquet::read; use common_error::DaftResult; use daft_core::{prelude::*, utils::arrow::cast_array_for_daft_if_needed}; - use daft_dsl::ExprRef; use daft_io::IOStatsRef; use daft_table::Table; use futures::{stream::BoxStream, StreamExt}; use itertools::Itertools; use rayon::{ - iter::IntoParallelRefMutIterator, + iter::{IntoParallelRefMutIterator, ParallelIterator}, prelude::{IndexedParallelIterator, IntoParallelIterator, ParallelBridge}, }; use snafu::ResultExt; @@ -23,12 +22,10 @@ use snafu::ResultExt; use crate::{ file::{build_row_ranges, RowGroupRange}, read::{ArrowChunk, ArrowChunkIters, ParquetSchemaInferenceOptions}, + stream_reader::read::schema::infer_schema_with_options, UnableToConvertSchemaToDaftSnafu, }; -use crate::stream_reader::read::schema::infer_schema_with_options; -use rayon::iter::ParallelIterator; - fn prune_fields_from_schema( schema: arrow2::datatypes::Schema, columns: Option<&[String]>, diff --git a/src/daft-plan/src/builder.rs b/src/daft-plan/src/builder.rs index 10e6c2d5f6..2c6b599303 100644 --- a/src/daft-plan/src/builder.rs +++ b/src/daft-plan/src/builder.rs @@ -3,28 +3,15 @@ use std::{ sync::Arc, }; -use crate::{ - logical_ops, - logical_optimization::{Optimizer, OptimizerConfig}, - logical_plan::LogicalPlan, - partitioning::{ - HashRepartitionConfig, IntoPartitionsConfig, RandomShuffleConfig, RepartitionSpec, - }, - sink_info::{OutputFileInfo, SinkInfo}, - source_info::SourceInfo, - LogicalPlanRef, -}; use common_daft_config::DaftPlanningConfig; use common_display::mermaid::MermaidDisplayOptions; use common_error::DaftResult; use common_file_formats::FileFormat; use common_io_config::IOConfig; use daft_core::join::{JoinStrategy, JoinType}; -use daft_schema::schema::{Schema, SchemaRef}; - use daft_dsl::{col, ExprRef}; use daft_scan::{PhysicalScanInfo, Pushdowns, ScanOperatorRef}; - +use daft_schema::schema::{Schema, SchemaRef}; #[cfg(feature = "python")] use { crate::sink_info::{CatalogInfo, IcebergCatalogInfo}, @@ -36,6 +23,18 @@ use { pyo3::prelude::*, }; +use crate::{ + logical_ops, + logical_optimization::{Optimizer, OptimizerConfig}, + logical_plan::LogicalPlan, + partitioning::{ + HashRepartitionConfig, IntoPartitionsConfig, RandomShuffleConfig, RepartitionSpec, + }, + sink_info::{OutputFileInfo, SinkInfo}, + source_info::SourceInfo, + LogicalPlanRef, +}; + /// A logical plan builder, which simplifies constructing logical plans via /// a fluent interface. E.g., LogicalPlanBuilder::table_scan(..).project(..).filter(..).build(). /// diff --git a/src/daft-plan/src/display.rs b/src/daft-plan/src/display.rs index 3d79dd179a..a89276cc04 100644 --- a/src/daft-plan/src/display.rs +++ b/src/daft-plan/src/display.rs @@ -86,10 +86,9 @@ impl Display for crate::physical_plan::PhysicalPlan { #[cfg(test)] mod test { - use common_display::mermaid::{MermaidDisplay, MermaidDisplayOptions, SubgraphOptions}; - use std::sync::Arc; + use common_display::mermaid::{MermaidDisplay, MermaidDisplayOptions, SubgraphOptions}; use common_error::DaftResult; use daft_core::prelude::*; use daft_dsl::{ diff --git a/src/daft-plan/src/lib.rs b/src/daft-plan/src/lib.rs index c9be5a7aa9..50e309916b 100644 --- a/src/daft-plan/src/lib.rs +++ b/src/daft-plan/src/lib.rs @@ -28,13 +28,12 @@ pub use physical_planner::{ logical_to_physical, populate_aggregation_stages, AdaptivePlanner, MaterializedResults, QueryStageOutput, }; -pub use sink_info::{OutputFileInfo, SinkInfo}; -pub use source_info::{FileInfo, FileInfos, InMemoryInfo, SourceInfo}; - #[cfg(feature = "python")] use pyo3::prelude::*; #[cfg(feature = "python")] pub use sink_info::{DeltaLakeCatalogInfo, IcebergCatalogInfo, LanceCatalogInfo}; +pub use sink_info::{OutputFileInfo, SinkInfo}; +pub use source_info::{FileInfo, FileInfos, InMemoryInfo, SourceInfo}; #[cfg(feature = "python")] use { common_file_formats::{ diff --git a/src/daft-plan/src/logical_ops/agg.rs b/src/daft-plan/src/logical_ops/agg.rs index 0512a33347..1eddc14a5f 100644 --- a/src/daft-plan/src/logical_ops/agg.rs +++ b/src/daft-plan/src/logical_ops/agg.rs @@ -1,13 +1,14 @@ use std::sync::Arc; -use itertools::Itertools; -use snafu::ResultExt; - use daft_dsl::{resolve_aggexprs, resolve_exprs, AggExpr, ExprRef}; use daft_schema::schema::{Schema, SchemaRef}; +use itertools::Itertools; +use snafu::ResultExt; -use crate::logical_plan::{self, CreationSnafu}; -use crate::LogicalPlan; +use crate::{ + logical_plan::{self, CreationSnafu}, + LogicalPlan, +}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Aggregate { diff --git a/src/daft-plan/src/logical_ops/concat.rs b/src/daft-plan/src/logical_ops/concat.rs index 00e45aa3d7..39541e39de 100644 --- a/src/daft-plan/src/logical_ops/concat.rs +++ b/src/daft-plan/src/logical_ops/concat.rs @@ -3,9 +3,7 @@ use std::sync::Arc; use common_error::DaftError; use snafu::ResultExt; -use crate::logical_plan; -use crate::logical_plan::CreationSnafu; -use crate::LogicalPlan; +use crate::{logical_plan, logical_plan::CreationSnafu, LogicalPlan}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Concat { diff --git a/src/daft-plan/src/logical_ops/filter.rs b/src/daft-plan/src/logical_ops/filter.rs index 05ed15acf4..c5187fca4a 100644 --- a/src/daft-plan/src/logical_ops/filter.rs +++ b/src/daft-plan/src/logical_ops/filter.rs @@ -1,12 +1,14 @@ use std::sync::Arc; +use common_error::DaftError; use daft_core::prelude::*; use daft_dsl::{resolve_single_expr, ExprRef}; use snafu::ResultExt; -use crate::logical_plan::{CreationSnafu, Result}; -use crate::LogicalPlan; -use common_error::DaftError; +use crate::{ + logical_plan::{CreationSnafu, Result}, + LogicalPlan, +}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Filter { diff --git a/src/daft-plan/src/logical_ops/pivot.rs b/src/daft-plan/src/logical_ops/pivot.rs index 0809cc9023..cc223b707c 100644 --- a/src/daft-plan/src/logical_ops/pivot.rs +++ b/src/daft-plan/src/logical_ops/pivot.rs @@ -1,14 +1,15 @@ use std::sync::Arc; use daft_core::prelude::*; -use itertools::Itertools; -use snafu::ResultExt; - use daft_dsl::{resolve_exprs, resolve_single_aggexpr, resolve_single_expr, AggExpr, ExprRef}; use daft_schema::schema::{Schema, SchemaRef}; +use itertools::Itertools; +use snafu::ResultExt; -use crate::logical_plan::{self, CreationSnafu}; -use crate::LogicalPlan; +use crate::{ + logical_plan::{self, CreationSnafu}, + LogicalPlan, +}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Pivot { diff --git a/src/daft-plan/src/logical_ops/project.rs b/src/daft-plan/src/logical_ops/project.rs index 3c344c56aa..41101fcd17 100644 --- a/src/daft-plan/src/logical_ops/project.rs +++ b/src/daft-plan/src/logical_ops/project.rs @@ -7,8 +7,10 @@ use indexmap::{IndexMap, IndexSet}; use itertools::Itertools; use snafu::ResultExt; -use crate::logical_plan::{CreationSnafu, Result}; -use crate::LogicalPlan; +use crate::{ + logical_plan::{CreationSnafu, Result}, + LogicalPlan, +}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Project { @@ -456,7 +458,6 @@ fn replace_column_with_semantic_id_aggexpr( mod tests { use common_error::DaftResult; use daft_core::prelude::*; - use daft_dsl::{binary_op, col, lit, Operator}; use crate::{ diff --git a/src/daft-plan/src/logical_ops/sample.rs b/src/daft-plan/src/logical_ops/sample.rs index 9151aea9a8..7b63c5b6ad 100644 --- a/src/daft-plan/src/logical_ops/sample.rs +++ b/src/daft-plan/src/logical_ops/sample.rs @@ -1,6 +1,9 @@ +use std::{ + hash::{Hash, Hasher}, + sync::Arc, +}; + use crate::LogicalPlan; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; #[derive(Clone, Debug, PartialEq)] pub struct Sample { diff --git a/src/daft-plan/src/logical_ops/sink.rs b/src/daft-plan/src/logical_ops/sink.rs index 8972954fa1..69f331fcb7 100644 --- a/src/daft-plan/src/logical_ops/sink.rs +++ b/src/daft-plan/src/logical_ops/sink.rs @@ -2,7 +2,6 @@ use std::sync::Arc; use common_error::DaftResult; use daft_core::prelude::*; - use daft_dsl::resolve_exprs; use crate::{sink_info::SinkInfo, LogicalPlan, OutputFileInfo}; diff --git a/src/daft-plan/src/logical_ops/sort.rs b/src/daft-plan/src/logical_ops/sort.rs index 133396119c..cab410da86 100644 --- a/src/daft-plan/src/logical_ops/sort.rs +++ b/src/daft-plan/src/logical_ops/sort.rs @@ -2,14 +2,11 @@ use std::sync::Arc; use common_error::DaftError; use daft_core::prelude::*; - use daft_dsl::{resolve_exprs, ExprRef}; use itertools::Itertools; use snafu::ResultExt; -use crate::logical_plan; -use crate::logical_plan::CreationSnafu; -use crate::LogicalPlan; +use crate::{logical_plan, logical_plan::CreationSnafu, LogicalPlan}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Sort { diff --git a/src/daft-plan/src/logical_ops/source.rs b/src/daft-plan/src/logical_ops/source.rs index 3d9d88ad89..70c2341fb4 100644 --- a/src/daft-plan/src/logical_ops/source.rs +++ b/src/daft-plan/src/logical_ops/source.rs @@ -3,10 +3,7 @@ use std::sync::Arc; use daft_scan::PhysicalScanInfo; use daft_schema::schema::SchemaRef; -use crate::source_info::SourceInfo; - -use crate::source_info::InMemoryInfo; -use crate::source_info::PlaceHolderInfo; +use crate::source_info::{InMemoryInfo, PlaceHolderInfo, SourceInfo}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Source { diff --git a/src/daft-plan/src/logical_ops/unpivot.rs b/src/daft-plan/src/logical_ops/unpivot.rs index ca20f5f22b..7f10f2ee0a 100644 --- a/src/daft-plan/src/logical_ops/unpivot.rs +++ b/src/daft-plan/src/logical_ops/unpivot.rs @@ -2,9 +2,7 @@ use std::sync::Arc; use common_error::DaftError; use daft_core::{prelude::*, utils::supertype::try_get_supertype}; - use daft_dsl::{resolve_exprs, ExprRef}; - use itertools::Itertools; use snafu::ResultExt; diff --git a/src/daft-plan/src/logical_optimization/logical_plan_tracker.rs b/src/daft-plan/src/logical_optimization/logical_plan_tracker.rs index aae8159c15..bcaf575de3 100644 --- a/src/daft-plan/src/logical_optimization/logical_plan_tracker.rs +++ b/src/daft-plan/src/logical_optimization/logical_plan_tracker.rs @@ -76,7 +76,6 @@ mod tests { use common_error::DaftResult; use daft_core::prelude::*; - use daft_dsl::{col, lit}; use crate::{ diff --git a/src/daft-plan/src/logical_optimization/optimizer.rs b/src/daft-plan/src/logical_optimization/optimizer.rs index fd411d021c..5e065e9213 100644 --- a/src/daft-plan/src/logical_optimization/optimizer.rs +++ b/src/daft-plan/src/logical_optimization/optimizer.rs @@ -1,8 +1,7 @@ use std::{ops::ControlFlow, sync::Arc}; use common_error::DaftResult; - -use crate::LogicalPlan; +use common_treenode::Transformed; use super::{ logical_plan_tracker::LogicalPlanTracker, @@ -11,7 +10,7 @@ use super::{ SplitActorPoolProjects, }, }; -use common_treenode::Transformed; +use crate::LogicalPlan; /// Config for optimizer. #[derive(Debug)] @@ -231,9 +230,9 @@ mod tests { use common_error::DaftResult; use common_treenode::{Transformed, TreeNode}; use daft_core::prelude::*; - use daft_dsl::{col, lit}; + use super::{Optimizer, OptimizerConfig, RuleBatch, RuleExecutionStrategy}; use crate::{ logical_ops::{Filter, Project}, logical_optimization::rules::OptimizerRule, @@ -241,8 +240,6 @@ mod tests { LogicalPlan, }; - use super::{Optimizer, OptimizerConfig, RuleBatch, RuleExecutionStrategy}; - /// Test that the optimizer terminates early when the plan is not transformed /// by a rule (i.e. a fixed-point is reached). #[test] diff --git a/src/daft-plan/src/logical_optimization/rules/drop_repartition.rs b/src/daft-plan/src/logical_optimization/rules/drop_repartition.rs index 838623e30b..727ebec298 100644 --- a/src/daft-plan/src/logical_optimization/rules/drop_repartition.rs +++ b/src/daft-plan/src/logical_optimization/rules/drop_repartition.rs @@ -1,12 +1,10 @@ use std::sync::Arc; use common_error::DaftResult; - -use crate::LogicalPlan; +use common_treenode::{DynTreeNode, Transformed, TreeNode}; use super::OptimizerRule; - -use common_treenode::{DynTreeNode, Transformed, TreeNode}; +use crate::LogicalPlan; /// Optimization rules for dropping unnecessary Repartitions. /// @@ -46,11 +44,11 @@ impl OptimizerRule for DropRepartition { #[cfg(test)] mod tests { + use std::sync::Arc; + use common_error::DaftResult; use daft_core::prelude::*; - use daft_dsl::col; - use std::sync::Arc; use crate::{ logical_optimization::{ diff --git a/src/daft-plan/src/logical_optimization/rules/push_down_filter.rs b/src/daft-plan/src/logical_optimization/rules/push_down_filter.rs index a8961b470d..14728dab25 100644 --- a/src/daft-plan/src/logical_optimization/rules/push_down_filter.rs +++ b/src/daft-plan/src/logical_optimization/rules/push_down_filter.rs @@ -4,6 +4,7 @@ use std::{ }; use common_error::DaftResult; +use common_treenode::{DynTreeNode, Transformed, TreeNode}; use daft_core::join::JoinType; use daft_dsl::{ col, @@ -14,15 +15,13 @@ use daft_dsl::{ }; use daft_scan::{rewrite_predicate_for_partitioning, PredicateGroups}; +use super::OptimizerRule; use crate::{ logical_ops::{Concat, Filter, Project, Source}, source_info::SourceInfo, LogicalPlan, }; -use super::OptimizerRule; -use common_treenode::{DynTreeNode, Transformed, TreeNode}; - /// Optimization rules for pushing Filters further into the logical plan. #[derive(Default, Debug)] pub struct PushDownFilter {} @@ -356,7 +355,6 @@ mod tests { use common_error::DaftResult; use daft_core::prelude::*; - use daft_dsl::{col, lit}; use daft_scan::Pushdowns; use rstest::rstest; diff --git a/src/daft-plan/src/logical_optimization/rules/push_down_limit.rs b/src/daft-plan/src/logical_optimization/rules/push_down_limit.rs index 2359cbdcfa..b8a3a223bd 100644 --- a/src/daft-plan/src/logical_optimization/rules/push_down_limit.rs +++ b/src/daft-plan/src/logical_optimization/rules/push_down_limit.rs @@ -1,16 +1,15 @@ use std::sync::Arc; use common_error::DaftResult; +use common_treenode::{DynTreeNode, Transformed, TreeNode}; +use super::OptimizerRule; use crate::{ logical_ops::{Limit as LogicalLimit, Source}, source_info::SourceInfo, LogicalPlan, }; -use super::OptimizerRule; -use common_treenode::{DynTreeNode, Transformed, TreeNode}; - /// Optimization rules for pushing Limits further into the logical plan. #[derive(Default, Debug)] pub struct PushDownLimit {} @@ -121,16 +120,15 @@ impl PushDownLimit { #[cfg(test)] mod tests { + use std::sync::Arc; + use common_error::DaftResult; use daft_core::prelude::*; - use daft_dsl::col; use daft_scan::Pushdowns; - use rstest::rstest; - use std::sync::Arc; - #[cfg(feature = "python")] use pyo3::Python; + use rstest::rstest; use crate::{ logical_optimization::{rules::PushDownLimit, test::assert_optimized_plan_with_rules_eq}, diff --git a/src/daft-plan/src/logical_optimization/rules/push_down_projection.rs b/src/daft-plan/src/logical_optimization/rules/push_down_projection.rs index a06f947746..b063823b9b 100644 --- a/src/daft-plan/src/logical_optimization/rules/push_down_projection.rs +++ b/src/daft-plan/src/logical_optimization/rules/push_down_projection.rs @@ -1,10 +1,8 @@ use std::{collections::HashMap, sync::Arc}; use common_error::DaftResult; - -use common_treenode::{Transformed, TreeNode}; +use common_treenode::{DynTreeNode, Transformed, TreeNode}; use daft_core::prelude::*; - use daft_dsl::{ col, has_stateful_udf, optimization::{get_required_columns, replace_columns_with_expressions, requires_computation}, @@ -13,15 +11,13 @@ use daft_dsl::{ use indexmap::IndexSet; use itertools::Itertools; +use super::OptimizerRule; use crate::{ logical_ops::{ActorPoolProject, Aggregate, Join, Pivot, Project, Source}, source_info::SourceInfo, LogicalPlan, LogicalPlanRef, }; -use super::OptimizerRule; -use common_treenode::DynTreeNode; - #[derive(Default, Debug)] pub struct PushDownProjection {} @@ -908,12 +904,16 @@ mod tests { /// Projection<-ActorPoolProject prunes columns from the ActorPoolProject #[test] fn test_projection_pushdown_into_actorpoolproject() -> DaftResult<()> { - use crate::logical_ops::ActorPoolProject; - use crate::logical_ops::Project; use common_resource_request::ResourceRequest; - use daft_dsl::functions::python::{PythonUDF, StatefulPythonUDF}; - use daft_dsl::functions::FunctionExpr; - use daft_dsl::Expr; + use daft_dsl::{ + functions::{ + python::{PythonUDF, StatefulPythonUDF}, + FunctionExpr, + }, + Expr, + }; + + use crate::logical_ops::{ActorPoolProject, Project}; let scan_op = dummy_scan_operator(vec![ Field::new("a", DataType::Int64), @@ -966,12 +966,16 @@ mod tests { /// Projection<-ActorPoolProject<-ActorPoolProject prunes columns from both ActorPoolProjects #[test] fn test_projection_pushdown_into_double_actorpoolproject() -> DaftResult<()> { - use crate::logical_ops::ActorPoolProject; - use crate::logical_ops::Project; use common_resource_request::ResourceRequest; - use daft_dsl::functions::python::{PythonUDF, StatefulPythonUDF}; - use daft_dsl::functions::FunctionExpr; - use daft_dsl::Expr; + use daft_dsl::{ + functions::{ + python::{PythonUDF, StatefulPythonUDF}, + FunctionExpr, + }, + Expr, + }; + + use crate::logical_ops::{ActorPoolProject, Project}; let scan_op = dummy_scan_operator(vec![ Field::new("a", DataType::Int64), @@ -1047,12 +1051,16 @@ mod tests { /// Projection<-ActorPoolProject prunes ActorPoolProject entirely if the stateful projection column is pruned #[test] fn test_projection_pushdown_into_actorpoolproject_completely_removed() -> DaftResult<()> { - use crate::logical_ops::ActorPoolProject; - use crate::logical_ops::Project; use common_resource_request::ResourceRequest; - use daft_dsl::functions::python::{PythonUDF, StatefulPythonUDF}; - use daft_dsl::functions::FunctionExpr; - use daft_dsl::Expr; + use daft_dsl::{ + functions::{ + python::{PythonUDF, StatefulPythonUDF}, + FunctionExpr, + }, + Expr, + }; + + use crate::logical_ops::{ActorPoolProject, Project}; let scan_op = dummy_scan_operator(vec![ Field::new("a", DataType::Int64), diff --git a/src/daft-plan/src/logical_optimization/rules/split_actor_pool_projects.rs b/src/daft-plan/src/logical_optimization/rules/split_actor_pool_projects.rs index 34e8d491ef..1d77502221 100644 --- a/src/daft-plan/src/logical_optimization/rules/split_actor_pool_projects.rs +++ b/src/daft-plan/src/logical_optimization/rules/split_actor_pool_projects.rs @@ -12,13 +12,12 @@ use daft_dsl::{ }; use itertools::Itertools; +use super::OptimizerRule; use crate::{ logical_ops::{ActorPoolProject, Project}, LogicalPlan, }; -use super::OptimizerRule; - #[derive(Default, Debug)] pub struct SplitActorPoolProjects {} @@ -514,12 +513,10 @@ fn has_stateful_udf(e: &ExprRef) -> bool { #[cfg(test)] mod tests { use std::sync::Arc; - use test_log::test; use common_error::DaftResult; use common_resource_request::ResourceRequest; use daft_core::prelude::*; - use daft_dsl::{ col, functions::{ @@ -528,7 +525,9 @@ mod tests { }, Expr, ExprRef, }; + use test_log::test; + use super::SplitActorPoolProjects; use crate::{ logical_ops::{ActorPoolProject, Project}, logical_optimization::{ @@ -538,8 +537,6 @@ mod tests { LogicalPlan, }; - use super::SplitActorPoolProjects; - /// Helper that creates an optimizer with the SplitExprByStatefulUDF rule registered, optimizes /// the provided plan with said optimizer, and compares the optimized plan with /// the provided expected plan. diff --git a/src/daft-plan/src/logical_optimization/test/mod.rs b/src/daft-plan/src/logical_optimization/test/mod.rs index a6540da0d5..75b53b2182 100644 --- a/src/daft-plan/src/logical_optimization/test/mod.rs +++ b/src/daft-plan/src/logical_optimization/test/mod.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use common_error::DaftResult; +use super::optimizer::OptimizerRuleInBatch; use crate::{ logical_optimization::{ optimizer::{RuleBatch, RuleExecutionStrategy}, @@ -10,8 +11,6 @@ use crate::{ LogicalPlan, }; -use super::optimizer::OptimizerRuleInBatch; - /// Helper that creates an optimizer with the provided rules registered, optimizes /// the provided plan with said optimizer, and compares the optimized plan with /// the provided expected plan. diff --git a/src/daft-plan/src/logical_plan.rs b/src/daft-plan/src/logical_plan.rs index e59e8a03a2..2cb2ca4834 100644 --- a/src/daft-plan/src/logical_plan.rs +++ b/src/daft-plan/src/logical_plan.rs @@ -1,13 +1,12 @@ use std::{num::NonZeroUsize, sync::Arc}; +use common_display::ascii::AsciiTreeDisplay; use common_error::DaftError; use daft_dsl::optimization::get_required_columns; use daft_schema::schema::SchemaRef; use indexmap::IndexSet; use snafu::Snafu; -use common_display::ascii::AsciiTreeDisplay; - pub use crate::logical_ops::*; /// Logical plan for a Daft query. diff --git a/src/daft-plan/src/physical_ops/agg.rs b/src/daft-plan/src/physical_ops/agg.rs index 84ccec7a75..4f7c65e974 100644 --- a/src/daft-plan/src/physical_ops/agg.rs +++ b/src/daft-plan/src/physical_ops/agg.rs @@ -1,8 +1,8 @@ use daft_dsl::{AggExpr, ExprRef}; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::{impl_default_tree_display, physical_plan::PhysicalPlanRef}; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Aggregate { diff --git a/src/daft-plan/src/physical_ops/broadcast_join.rs b/src/daft-plan/src/physical_ops/broadcast_join.rs index d6ccc20982..b45ce7c4fa 100644 --- a/src/daft-plan/src/physical_ops/broadcast_join.rs +++ b/src/daft-plan/src/physical_ops/broadcast_join.rs @@ -1,10 +1,10 @@ use common_display::tree::TreeDisplay; +use daft_core::join::JoinType; use daft_dsl::ExprRef; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::physical_plan::PhysicalPlanRef; -use daft_core::join::JoinType; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct BroadcastJoin { diff --git a/src/daft-plan/src/physical_ops/coalesce.rs b/src/daft-plan/src/physical_ops/coalesce.rs index 8fdcd4791e..3cf2a062d0 100644 --- a/src/daft-plan/src/physical_ops/coalesce.rs +++ b/src/daft-plan/src/physical_ops/coalesce.rs @@ -1,6 +1,7 @@ -use crate::{impl_default_tree_display, physical_plan::PhysicalPlanRef}; use serde::{Deserialize, Serialize}; +use crate::{impl_default_tree_display, physical_plan::PhysicalPlanRef}; + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Coalesce { // Upstream node. diff --git a/src/daft-plan/src/physical_ops/concat.rs b/src/daft-plan/src/physical_ops/concat.rs index 91ed50700f..ce77ced574 100644 --- a/src/daft-plan/src/physical_ops/concat.rs +++ b/src/daft-plan/src/physical_ops/concat.rs @@ -1,6 +1,7 @@ -use crate::{impl_default_tree_display, physical_plan::PhysicalPlanRef}; use serde::{Deserialize, Serialize}; +use crate::{impl_default_tree_display, physical_plan::PhysicalPlanRef}; + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Concat { // Upstream node. diff --git a/src/daft-plan/src/physical_ops/csv.rs b/src/daft-plan/src/physical_ops/csv.rs index a1859d9be0..c073375253 100644 --- a/src/daft-plan/src/physical_ops/csv.rs +++ b/src/daft-plan/src/physical_ops/csv.rs @@ -1,7 +1,7 @@ use daft_schema::schema::SchemaRef; +use serde::{Deserialize, Serialize}; use crate::{physical_plan::PhysicalPlanRef, sink_info::OutputFileInfo}; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct TabularWriteCsv { diff --git a/src/daft-plan/src/physical_ops/deltalake_write.rs b/src/daft-plan/src/physical_ops/deltalake_write.rs index 60844806c7..73af6650a9 100644 --- a/src/daft-plan/src/physical_ops/deltalake_write.rs +++ b/src/daft-plan/src/physical_ops/deltalake_write.rs @@ -1,7 +1,7 @@ use daft_schema::schema::SchemaRef; +use serde::{Deserialize, Serialize}; use crate::{physical_plan::PhysicalPlanRef, sink_info::DeltaLakeCatalogInfo}; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct DeltaLakeWrite { diff --git a/src/daft-plan/src/physical_ops/empty_scan.rs b/src/daft-plan/src/physical_ops/empty_scan.rs index 191b7f3b2c..63097b33b9 100644 --- a/src/daft-plan/src/physical_ops/empty_scan.rs +++ b/src/daft-plan/src/physical_ops/empty_scan.rs @@ -1,8 +1,10 @@ -use crate::ClusteringSpec; +use std::sync::Arc; + use common_display::tree::TreeDisplay; use daft_schema::schema::SchemaRef; use serde::{Deserialize, Serialize}; -use std::sync::Arc; + +use crate::ClusteringSpec; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct EmptyScan { diff --git a/src/daft-plan/src/physical_ops/explode.rs b/src/daft-plan/src/physical_ops/explode.rs index ea08e51769..d16e28f04a 100644 --- a/src/daft-plan/src/physical_ops/explode.rs +++ b/src/daft-plan/src/physical_ops/explode.rs @@ -3,6 +3,7 @@ use std::{collections::HashSet, sync::Arc}; use common_error::DaftResult; use daft_dsl::{optimization::get_required_columns, ExprRef}; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::{ impl_default_tree_display, @@ -10,7 +11,6 @@ use crate::{ physical_plan::PhysicalPlanRef, ClusteringSpec, }; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Explode { diff --git a/src/daft-plan/src/physical_ops/fanout.rs b/src/daft-plan/src/physical_ops/fanout.rs index 4a21daca52..794aaa9373 100644 --- a/src/daft-plan/src/physical_ops/fanout.rs +++ b/src/daft-plan/src/physical_ops/fanout.rs @@ -1,8 +1,8 @@ use daft_dsl::ExprRef; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::physical_plan::PhysicalPlanRef; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct FanoutRandom { diff --git a/src/daft-plan/src/physical_ops/filter.rs b/src/daft-plan/src/physical_ops/filter.rs index d42a793f9a..fe2bfade39 100644 --- a/src/daft-plan/src/physical_ops/filter.rs +++ b/src/daft-plan/src/physical_ops/filter.rs @@ -1,7 +1,7 @@ use daft_dsl::ExprRef; +use serde::{Deserialize, Serialize}; use crate::physical_plan::PhysicalPlanRef; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Filter { diff --git a/src/daft-plan/src/physical_ops/flatten.rs b/src/daft-plan/src/physical_ops/flatten.rs index 902ff9804e..41ac967026 100644 --- a/src/daft-plan/src/physical_ops/flatten.rs +++ b/src/daft-plan/src/physical_ops/flatten.rs @@ -1,6 +1,7 @@ -use crate::physical_plan::PhysicalPlanRef; use serde::{Deserialize, Serialize}; +use crate::physical_plan::PhysicalPlanRef; + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Flatten { // Upstream node. diff --git a/src/daft-plan/src/physical_ops/hash_join.rs b/src/daft-plan/src/physical_ops/hash_join.rs index 97d047b216..5d1895c1d8 100644 --- a/src/daft-plan/src/physical_ops/hash_join.rs +++ b/src/daft-plan/src/physical_ops/hash_join.rs @@ -1,10 +1,10 @@ use common_display::tree::TreeDisplay; +use daft_core::join::JoinType; use daft_dsl::ExprRef; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::physical_plan::PhysicalPlanRef; -use daft_core::join::JoinType; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct HashJoin { diff --git a/src/daft-plan/src/physical_ops/iceberg_write.rs b/src/daft-plan/src/physical_ops/iceberg_write.rs index d2c6086cd1..c5959055c2 100644 --- a/src/daft-plan/src/physical_ops/iceberg_write.rs +++ b/src/daft-plan/src/physical_ops/iceberg_write.rs @@ -1,7 +1,7 @@ use daft_schema::schema::SchemaRef; +use serde::{Deserialize, Serialize}; use crate::{physical_plan::PhysicalPlanRef, sink_info::IcebergCatalogInfo}; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] diff --git a/src/daft-plan/src/physical_ops/in_memory.rs b/src/daft-plan/src/physical_ops/in_memory.rs index c4fc7d0bd4..56f52533c4 100644 --- a/src/daft-plan/src/physical_ops/in_memory.rs +++ b/src/daft-plan/src/physical_ops/in_memory.rs @@ -1,8 +1,10 @@ -use crate::{source_info::InMemoryInfo, ClusteringSpec}; +use std::sync::Arc; + use common_display::{tree::TreeDisplay, DisplayLevel}; use daft_schema::schema::SchemaRef; use serde::{Deserialize, Serialize}; -use std::sync::Arc; + +use crate::{source_info::InMemoryInfo, ClusteringSpec}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct InMemoryScan { diff --git a/src/daft-plan/src/physical_ops/json.rs b/src/daft-plan/src/physical_ops/json.rs index 299bf17108..a55bf2901c 100644 --- a/src/daft-plan/src/physical_ops/json.rs +++ b/src/daft-plan/src/physical_ops/json.rs @@ -1,7 +1,7 @@ use daft_schema::schema::SchemaRef; +use serde::{Deserialize, Serialize}; use crate::{physical_plan::PhysicalPlanRef, sink_info::OutputFileInfo}; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct TabularWriteJson { diff --git a/src/daft-plan/src/physical_ops/lance_write.rs b/src/daft-plan/src/physical_ops/lance_write.rs index 0b88130ede..951090df20 100644 --- a/src/daft-plan/src/physical_ops/lance_write.rs +++ b/src/daft-plan/src/physical_ops/lance_write.rs @@ -1,7 +1,7 @@ use daft_schema::schema::SchemaRef; +use serde::{Deserialize, Serialize}; use crate::{physical_plan::PhysicalPlanRef, sink_info::LanceCatalogInfo}; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct LanceWrite { diff --git a/src/daft-plan/src/physical_ops/limit.rs b/src/daft-plan/src/physical_ops/limit.rs index 07778aaa9d..7373b408c6 100644 --- a/src/daft-plan/src/physical_ops/limit.rs +++ b/src/daft-plan/src/physical_ops/limit.rs @@ -1,6 +1,7 @@ -use crate::physical_plan::PhysicalPlanRef; use serde::{Deserialize, Serialize}; +use crate::physical_plan::PhysicalPlanRef; + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Limit { // Upstream node. diff --git a/src/daft-plan/src/physical_ops/monotonically_increasing_id.rs b/src/daft-plan/src/physical_ops/monotonically_increasing_id.rs index 906c222ad5..d2aa7267c5 100644 --- a/src/daft-plan/src/physical_ops/monotonically_increasing_id.rs +++ b/src/daft-plan/src/physical_ops/monotonically_increasing_id.rs @@ -1,8 +1,9 @@ use std::sync::Arc; -use crate::physical_plan::PhysicalPlan; use serde::{Deserialize, Serialize}; +use crate::physical_plan::PhysicalPlan; + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct MonotonicallyIncreasingId { pub input: Arc, diff --git a/src/daft-plan/src/physical_ops/parquet.rs b/src/daft-plan/src/physical_ops/parquet.rs index bf9629cea5..4f3c502903 100644 --- a/src/daft-plan/src/physical_ops/parquet.rs +++ b/src/daft-plan/src/physical_ops/parquet.rs @@ -1,7 +1,7 @@ use daft_schema::schema::SchemaRef; +use serde::{Deserialize, Serialize}; use crate::{physical_plan::PhysicalPlanRef, sink_info::OutputFileInfo}; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct TabularWriteParquet { diff --git a/src/daft-plan/src/physical_ops/pivot.rs b/src/daft-plan/src/physical_ops/pivot.rs index f2e0c9ae85..c3b290bee3 100644 --- a/src/daft-plan/src/physical_ops/pivot.rs +++ b/src/daft-plan/src/physical_ops/pivot.rs @@ -1,8 +1,8 @@ use daft_dsl::ExprRef; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::physical_plan::PhysicalPlanRef; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Pivot { diff --git a/src/daft-plan/src/physical_ops/project.rs b/src/daft-plan/src/physical_ops/project.rs index 82fb26dae8..5080d36702 100644 --- a/src/daft-plan/src/physical_ops/project.rs +++ b/src/daft-plan/src/physical_ops/project.rs @@ -4,11 +4,11 @@ use common_error::DaftResult; use common_resource_request::ResourceRequest; use daft_dsl::{functions::python::get_resource_request, ExprRef}; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::{ partitioning::translate_clustering_spec, physical_plan::PhysicalPlanRef, ClusteringSpec, }; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Project { @@ -74,7 +74,6 @@ mod tests { use common_daft_config::DaftExecutionConfig; use common_error::DaftResult; use daft_core::prelude::*; - use daft_dsl::{col, lit, ExprRef}; use rstest::rstest; diff --git a/src/daft-plan/src/physical_ops/reduce.rs b/src/daft-plan/src/physical_ops/reduce.rs index 905ec77e78..997499646b 100644 --- a/src/daft-plan/src/physical_ops/reduce.rs +++ b/src/daft-plan/src/physical_ops/reduce.rs @@ -1,6 +1,7 @@ -use crate::physical_plan::PhysicalPlanRef; use serde::{Deserialize, Serialize}; +use crate::physical_plan::PhysicalPlanRef; + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct ReduceMerge { // Upstream node. diff --git a/src/daft-plan/src/physical_ops/sample.rs b/src/daft-plan/src/physical_ops/sample.rs index d2f7880f96..d859071419 100644 --- a/src/daft-plan/src/physical_ops/sample.rs +++ b/src/daft-plan/src/physical_ops/sample.rs @@ -1,6 +1,7 @@ -use crate::physical_plan::PhysicalPlanRef; use serde::{Deserialize, Serialize}; +use crate::physical_plan::PhysicalPlanRef; + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Sample { pub input: PhysicalPlanRef, diff --git a/src/daft-plan/src/physical_ops/scan.rs b/src/daft-plan/src/physical_ops/scan.rs index efc54c64a1..d99fc9297b 100644 --- a/src/daft-plan/src/physical_ops/scan.rs +++ b/src/daft-plan/src/physical_ops/scan.rs @@ -1,10 +1,11 @@ +use std::sync::Arc; + use common_display::{tree::TreeDisplay, DisplayAs, DisplayLevel}; use common_file_formats::FileFormatConfig; use daft_scan::ScanTask; -use std::sync::Arc; +use serde::{Deserialize, Serialize}; use crate::ClusteringSpec; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct TabularScan { diff --git a/src/daft-plan/src/physical_ops/sort.rs b/src/daft-plan/src/physical_ops/sort.rs index bebb756eb7..4d6df2a97f 100644 --- a/src/daft-plan/src/physical_ops/sort.rs +++ b/src/daft-plan/src/physical_ops/sort.rs @@ -1,8 +1,8 @@ use daft_dsl::ExprRef; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::physical_plan::PhysicalPlanRef; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Sort { diff --git a/src/daft-plan/src/physical_ops/sort_merge_join.rs b/src/daft-plan/src/physical_ops/sort_merge_join.rs index 5f5ac91a45..32cad88c31 100644 --- a/src/daft-plan/src/physical_ops/sort_merge_join.rs +++ b/src/daft-plan/src/physical_ops/sort_merge_join.rs @@ -1,10 +1,10 @@ use common_display::tree::TreeDisplay; +use daft_core::join::JoinType; use daft_dsl::ExprRef; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::physical_plan::PhysicalPlanRef; -use daft_core::join::JoinType; -use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct SortMergeJoin { diff --git a/src/daft-plan/src/physical_ops/split.rs b/src/daft-plan/src/physical_ops/split.rs index 50591840d4..57599ad459 100644 --- a/src/daft-plan/src/physical_ops/split.rs +++ b/src/daft-plan/src/physical_ops/split.rs @@ -1,6 +1,7 @@ -use crate::physical_plan::PhysicalPlanRef; use serde::{Deserialize, Serialize}; +use crate::physical_plan::PhysicalPlanRef; + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Split { // Upstream node. diff --git a/src/daft-plan/src/physical_ops/unpivot.rs b/src/daft-plan/src/physical_ops/unpivot.rs index a3e4b8fa27..f9723e1851 100644 --- a/src/daft-plan/src/physical_ops/unpivot.rs +++ b/src/daft-plan/src/physical_ops/unpivot.rs @@ -1,9 +1,8 @@ use std::sync::Arc; use daft_dsl::ExprRef; -use serde::{Deserialize, Serialize}; - use itertools::Itertools; +use serde::{Deserialize, Serialize}; use crate::{ partitioning::translate_clustering_spec, physical_plan::PhysicalPlanRef, ClusteringSpec, diff --git a/src/daft-plan/src/physical_optimization/optimizer.rs b/src/daft-plan/src/physical_optimization/optimizer.rs index e6e67a057d..51e2b7ade3 100644 --- a/src/daft-plan/src/physical_optimization/optimizer.rs +++ b/src/daft-plan/src/physical_optimization/optimizer.rs @@ -1,11 +1,10 @@ use common_error::DaftResult; -use crate::PhysicalPlanRef; - use super::rules::{ drop_repartition::DropRepartitionPhysical, reorder_partition_keys::ReorderPartitionKeys, PhysicalOptimizerRuleBatch, PhysicalRuleExecutionStrategy, }; +use crate::PhysicalPlanRef; pub struct PhysicalOptimizerConfig { // The upper bound on the number of passes a rule batch can run. @@ -75,6 +74,7 @@ mod tests { use common_treenode::Transformed; use daft_core::prelude::*; + use super::{PhysicalOptimizer, PhysicalOptimizerRuleBatch}; use crate::{ partitioning::UnknownClusteringConfig, physical_ops::{EmptyScan, Limit}, @@ -82,8 +82,6 @@ mod tests { ClusteringSpec, PhysicalPlan, PhysicalPlanRef, }; - use super::{PhysicalOptimizer, PhysicalOptimizerRuleBatch}; - fn create_dummy_plan(schema: SchemaRef, num_partitions: usize) -> PhysicalPlanRef { PhysicalPlan::EmptyScan(EmptyScan::new( schema, diff --git a/src/daft-plan/src/physical_optimization/plan_context.rs b/src/daft-plan/src/physical_optimization/plan_context.rs index 17bb03b894..95f3126c08 100644 --- a/src/daft-plan/src/physical_optimization/plan_context.rs +++ b/src/daft-plan/src/physical_optimization/plan_context.rs @@ -1,8 +1,7 @@ use common_error::DaftResult; -use common_treenode::ConcreteTreeNode; +use common_treenode::{ConcreteTreeNode, DynTreeNode}; use crate::PhysicalPlanRef; -use common_treenode::DynTreeNode; // This struct allows providing context or state to go along // with visiting TreeNodes. pub(super) struct PlanContext { diff --git a/src/daft-plan/src/physical_optimization/rules/drop_repartition.rs b/src/daft-plan/src/physical_optimization/rules/drop_repartition.rs index 40e3d7c504..a0827d11f8 100644 --- a/src/daft-plan/src/physical_optimization/rules/drop_repartition.rs +++ b/src/daft-plan/src/physical_optimization/rules/drop_repartition.rs @@ -1,11 +1,10 @@ use common_error::DaftResult; -use common_treenode::{Transformed, TreeNode}; +use common_treenode::{DynTreeNode, Transformed, TreeNode}; use crate::{ physical_ops::FanoutByHash, physical_optimization::rules::PhysicalOptimizerRule, ClusteringSpec, PhysicalPlan, PhysicalPlanRef, }; -use common_treenode::DynTreeNode; pub struct DropRepartitionPhysical {} // if we are repartitioning but the child already has the correct spec, then don't repartition @@ -57,6 +56,7 @@ mod tests { use daft_core::prelude::*; use daft_dsl::{col, ExprRef}; + use super::DropRepartitionPhysical; use crate::{ partitioning::UnknownClusteringConfig, physical_ops::{EmptyScan, FanoutByHash, ReduceMerge}, @@ -64,8 +64,6 @@ mod tests { ClusteringSpec, PhysicalPlan, PhysicalPlanRef, }; - use super::DropRepartitionPhysical; - fn create_dummy_plan(schema: SchemaRef, num_partitions: usize) -> PhysicalPlanRef { PhysicalPlan::EmptyScan(EmptyScan::new( schema, diff --git a/src/daft-plan/src/physical_optimization/rules/reorder_partition_keys.rs b/src/daft-plan/src/physical_optimization/rules/reorder_partition_keys.rs index 9a481ce57f..c7f706abb0 100644 --- a/src/daft-plan/src/physical_optimization/rules/reorder_partition_keys.rs +++ b/src/daft-plan/src/physical_optimization/rules/reorder_partition_keys.rs @@ -177,8 +177,8 @@ mod tests { use crate::{ partitioning::UnknownClusteringConfig, physical_ops::{EmptyScan, FanoutByHash, HashJoin, ReduceMerge}, - physical_optimization::{ - rules::reorder_partition_keys::ReorderPartitionKeys, rules::PhysicalOptimizerRule, + physical_optimization::rules::{ + reorder_partition_keys::ReorderPartitionKeys, PhysicalOptimizerRule, }, ClusteringSpec, PhysicalPlan, PhysicalPlanRef, }; diff --git a/src/daft-plan/src/physical_plan.rs b/src/daft-plan/src/physical_plan.rs index 20a32e7eeb..6302612e3d 100644 --- a/src/daft-plan/src/physical_plan.rs +++ b/src/daft-plan/src/physical_plan.rs @@ -1,6 +1,7 @@ +use std::{cmp::max, ops::Add, sync::Arc}; + use common_display::ascii::AsciiTreeDisplay; use serde::{Deserialize, Serialize}; -use std::{cmp::max, ops::Add, sync::Arc}; use crate::{ partitioning::{ diff --git a/src/daft-plan/src/physical_planner/mod.rs b/src/daft-plan/src/physical_planner/mod.rs index 5012034c74..9813a73be8 100644 --- a/src/daft-plan/src/physical_planner/mod.rs +++ b/src/daft-plan/src/physical_planner/mod.rs @@ -2,12 +2,12 @@ use std::sync::Arc; use common_daft_config::DaftExecutionConfig; use common_error::DaftResult; - -use crate::LogicalPlan; -use crate::{physical_optimization::optimizer::PhysicalOptimizer, physical_plan::PhysicalPlanRef}; - -use crate::physical_planner::planner::PhysicalPlanTranslator; use common_treenode::TreeNode; + +use crate::{ + physical_optimization::optimizer::PhysicalOptimizer, physical_plan::PhysicalPlanRef, + physical_planner::planner::PhysicalPlanTranslator, LogicalPlan, +}; mod planner; pub use planner::{AdaptivePlanner, MaterializedResults, QueryStageOutput}; mod translate; diff --git a/src/daft-plan/src/physical_planner/planner.rs b/src/daft-plan/src/physical_planner/planner.rs index 16ca8328a2..685f8dd028 100644 --- a/src/daft-plan/src/physical_planner/planner.rs +++ b/src/daft-plan/src/physical_planner/planner.rs @@ -2,20 +2,20 @@ use std::sync::Arc; use common_daft_config::DaftExecutionConfig; use common_error::DaftResult; -use common_treenode::{Transformed, TreeNode, TreeNodeRewriter, TreeNodeVisitor}; +use common_treenode::{ + DynTreeNode, Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor, +}; use serde::{Deserialize, Serialize}; -use crate::logical_ops::Source; -use crate::logical_optimization::Optimizer; -use crate::logical_plan::LogicalPlan; - -use crate::physical_plan::{PhysicalPlan, PhysicalPlanRef}; -use crate::source_info::{InMemoryInfo, PlaceHolderInfo, SourceInfo}; -use crate::LogicalPlanRef; - -use common_treenode::{DynTreeNode, TreeNodeRecursion}; - use super::translate::translate_single_logical_node; +use crate::{ + logical_ops::Source, + logical_optimization::Optimizer, + logical_plan::LogicalPlan, + physical_plan::{PhysicalPlan, PhysicalPlanRef}, + source_info::{InMemoryInfo, PlaceHolderInfo, SourceInfo}, + LogicalPlanRef, +}; pub(super) struct PhysicalPlanTranslator { pub physical_children: Vec>, pub cfg: Arc, diff --git a/src/daft-plan/src/physical_planner/translate.rs b/src/daft-plan/src/physical_planner/translate.rs index 20323d0b9a..639c571871 100644 --- a/src/daft-plan/src/physical_planner/translate.rs +++ b/src/daft-plan/src/physical_planner/translate.rs @@ -1,36 +1,34 @@ -use std::cmp::Ordering; -use std::sync::Arc; use std::{ - cmp::{max, min}, + cmp::{max, min, Ordering}, collections::HashMap, + sync::Arc, }; use common_daft_config::DaftExecutionConfig; use common_error::DaftResult; - use common_file_formats::FileFormat; use daft_core::prelude::*; -use daft_dsl::{col, ApproxPercentileParams, SketchType}; -use daft_dsl::{is_partition_compatible, ExprRef}; - +use daft_dsl::{col, is_partition_compatible, ApproxPercentileParams, ExprRef, SketchType}; use daft_scan::PhysicalScanInfo; -use crate::logical_ops::{ - ActorPoolProject as LogicalActorPoolProject, Aggregate as LogicalAggregate, - Distinct as LogicalDistinct, Explode as LogicalExplode, Filter as LogicalFilter, - Join as LogicalJoin, Limit as LogicalLimit, - MonotonicallyIncreasingId as LogicalMonotonicallyIncreasingId, Pivot as LogicalPivot, - Project as LogicalProject, Repartition as LogicalRepartition, Sample as LogicalSample, - Sink as LogicalSink, Sort as LogicalSort, Source, Unpivot as LogicalUnpivot, -}; -use crate::logical_plan::LogicalPlan; -use crate::partitioning::{ - ClusteringSpec, HashClusteringConfig, RangeClusteringConfig, UnknownClusteringConfig, +use crate::{ + logical_ops::{ + ActorPoolProject as LogicalActorPoolProject, Aggregate as LogicalAggregate, + Distinct as LogicalDistinct, Explode as LogicalExplode, Filter as LogicalFilter, + Join as LogicalJoin, Limit as LogicalLimit, + MonotonicallyIncreasingId as LogicalMonotonicallyIncreasingId, Pivot as LogicalPivot, + Project as LogicalProject, Repartition as LogicalRepartition, Sample as LogicalSample, + Sink as LogicalSink, Sort as LogicalSort, Source, Unpivot as LogicalUnpivot, + }, + logical_plan::LogicalPlan, + partitioning::{ + ClusteringSpec, HashClusteringConfig, RangeClusteringConfig, UnknownClusteringConfig, + }, + physical_ops::*, + physical_plan::{PhysicalPlan, PhysicalPlanRef}, + sink_info::{OutputFileInfo, SinkInfo}, + source_info::{PlaceHolderInfo, SourceInfo}, }; -use crate::physical_ops::*; -use crate::physical_plan::{PhysicalPlan, PhysicalPlanRef}; -use crate::sink_info::{OutputFileInfo, SinkInfo}; -use crate::source_info::{PlaceHolderInfo, SourceInfo}; pub(super) fn translate_single_logical_node( logical_plan: &LogicalPlan, @@ -959,19 +957,20 @@ pub fn populate_aggregation_stages( #[cfg(test)] mod tests { + use std::{assert_matches::assert_matches, sync::Arc}; + use common_daft_config::DaftExecutionConfig; use common_error::DaftResult; use daft_core::prelude::*; use daft_dsl::{col, lit}; - use std::assert_matches::assert_matches; - use std::sync::Arc; - - use crate::physical_plan::PhysicalPlan; - use crate::physical_planner::logical_to_physical; - use crate::test::{dummy_scan_node, dummy_scan_operator}; - use crate::{LogicalPlanBuilder, PhysicalPlanRef}; use super::HashJoin; + use crate::{ + physical_plan::PhysicalPlan, + physical_planner::logical_to_physical, + test::{dummy_scan_node, dummy_scan_operator}, + LogicalPlanBuilder, PhysicalPlanRef, + }; /// Tests that planner drops a simple Repartition (e.g. df.into_partitions()) the child already has the desired number of partitions. /// diff --git a/src/daft-plan/src/sink_info.rs b/src/daft-plan/src/sink_info.rs index 0fd7da6f32..e2c4c374e4 100644 --- a/src/daft-plan/src/sink_info.rs +++ b/src/daft-plan/src/sink_info.rs @@ -2,17 +2,14 @@ use std::hash::Hash; use common_file_formats::FileFormat; use common_io_config::IOConfig; +#[cfg(feature = "python")] +use common_py_serde::{deserialize_py_object, serialize_py_object}; use daft_dsl::ExprRef; use itertools::Itertools; - #[cfg(feature = "python")] use pyo3::PyObject; - use serde::{Deserialize, Serialize}; -#[cfg(feature = "python")] -use common_py_serde::{deserialize_py_object, serialize_py_object}; - #[allow(clippy::large_enum_variant)] #[derive(Debug, PartialEq, Eq, Hash)] pub enum SinkInfo { diff --git a/src/daft-plan/src/source_info/mod.rs b/src/daft-plan/src/source_info/mod.rs index 8ee8aef5ca..ef5582ca3a 100644 --- a/src/daft-plan/src/source_info/mod.rs +++ b/src/daft-plan/src/source_info/mod.rs @@ -1,20 +1,21 @@ pub mod file_info; +use std::{ + hash::{Hash, Hasher}, + sync::atomic::AtomicUsize, +}; + use daft_scan::PhysicalScanInfo; use daft_schema::schema::SchemaRef; pub use file_info::{FileInfo, FileInfos}; use serde::{Deserialize, Serialize}; -use std::hash::Hash; -use std::sync::atomic::AtomicUsize; - -use crate::partitioning::ClusteringSpecRef; -use std::hash::Hasher; - #[cfg(feature = "python")] use { common_py_serde::{deserialize_py_object, serialize_py_object}, pyo3::PyObject, }; +use crate::partitioning::ClusteringSpecRef; + #[derive(Debug, PartialEq, Eq, Hash)] pub enum SourceInfo { InMemory(InMemoryInfo), diff --git a/src/daft-plan/src/test/mod.rs b/src/daft-plan/src/test/mod.rs index 25712e7071..10d7b422d9 100644 --- a/src/daft-plan/src/test/mod.rs +++ b/src/daft-plan/src/test/mod.rs @@ -2,8 +2,8 @@ use std::sync::Arc; use common_file_formats::FileFormatConfig; use daft_scan::{ - storage_config::NativeStorageConfig, storage_config::StorageConfig, AnonymousScanOperator, - Pushdowns, ScanOperator, + storage_config::{NativeStorageConfig, StorageConfig}, + AnonymousScanOperator, Pushdowns, ScanOperator, }; use daft_schema::{field::Field, schema::Schema}; diff --git a/src/daft-plan/src/treenode.rs b/src/daft-plan/src/treenode.rs index db82778bb8..7c4e42a6da 100644 --- a/src/daft-plan/src/treenode.rs +++ b/src/daft-plan/src/treenode.rs @@ -1,9 +1,10 @@ use std::sync::Arc; -use crate::{physical_plan::PhysicalPlan, LogicalPlan}; use common_error::DaftResult; use common_treenode::DynTreeNode; +use crate::{physical_plan::PhysicalPlan, LogicalPlan}; + impl DynTreeNode for LogicalPlan { fn arc_children(&self) -> Vec> { self.children() diff --git a/src/daft-scan/src/python.rs b/src/daft-scan/src/python.rs index 99c7e5b04c..af5b23a1db 100644 --- a/src/daft-scan/src/python.rs +++ b/src/daft-scan/src/python.rs @@ -1,8 +1,7 @@ +use common_py_serde::{deserialize_py_object, serialize_py_object}; use pyo3::{prelude::*, types::PyTuple}; use serde::{Deserialize, Serialize}; -use common_py_serde::{deserialize_py_object, serialize_py_object}; - #[derive(Debug, Clone, Serialize, Deserialize)] struct PyObjectSerializableWrapper( #[serde( @@ -39,44 +38,33 @@ impl PartialEq for PythonTablesFactoryArgs { } pub mod pylib { + use std::sync::Arc; + + use common_daft_config::PyDaftExecutionConfig; use common_error::DaftResult; - use common_file_formats::python::PyFileFormatConfig; - use common_file_formats::FileFormatConfig; + use common_file_formats::{python::PyFileFormatConfig, FileFormatConfig}; use common_py_serde::impl_bincode_py_state_serialization; use daft_dsl::python::PyExpr; - use daft_schema::python::field::PyField; - use daft_schema::schema::SchemaRef; - - use daft_stats::PartitionSpec; - use daft_stats::TableMetadata; - use daft_stats::TableStatistics; - use daft_table::python::PyTable; - use daft_table::Table; - use pyo3::prelude::*; - - use pyo3::types::PyIterator; - use pyo3::types::PyList; - use std::sync::Arc; - - use daft_schema::python::schema::PySchema; - - use pyo3::pyclass; + use daft_schema::{ + python::{field::PyField, schema::PySchema}, + schema::SchemaRef, + }; + use daft_stats::{PartitionSpec, TableMetadata, TableStatistics}; + use daft_table::{python::PyTable, Table}; + use pyo3::{ + prelude::*, + pyclass, + types::{PyIterator, PyList}, + }; use serde::{Deserialize, Serialize}; - use crate::anonymous::AnonymousScanOperator; - use crate::storage_config::PythonStorageConfig; - use crate::DataSource; - use crate::PartitionField; - use crate::Pushdowns; - use crate::ScanOperator; - use crate::ScanOperatorRef; - use crate::ScanTask; - - use crate::glob::GlobScanOperator; - use crate::storage_config::PyStorageConfig; - use common_daft_config::PyDaftExecutionConfig; - use super::PythonTablesFactoryArgs; + use crate::{ + anonymous::AnonymousScanOperator, + glob::GlobScanOperator, + storage_config::{PyStorageConfig, PythonStorageConfig}, + DataSource, PartitionField, Pushdowns, ScanOperator, ScanOperatorRef, ScanTask, + }; #[pyclass(module = "daft.daft", frozen)] #[derive(Debug, Clone)] pub struct ScanOperatorHandle { diff --git a/src/daft-scan/src/storage_config.rs b/src/daft-scan/src/storage_config.rs index 333a040005..ced4c95315 100644 --- a/src/daft-scan/src/storage_config.rs +++ b/src/daft-scan/src/storage_config.rs @@ -5,7 +5,6 @@ use common_io_config::IOConfig; use common_py_serde::impl_bincode_py_state_serialization; use daft_io::{get_io_client, get_runtime, IOClient, RuntimeRef}; use serde::{Deserialize, Serialize}; - #[cfg(feature = "python")] use { common_io_config::python, diff --git a/src/daft-scheduler/src/adaptive.rs b/src/daft-scheduler/src/adaptive.rs index f9ea81c4b2..0e4a2bbc77 100644 --- a/src/daft-scheduler/src/adaptive.rs +++ b/src/daft-scheduler/src/adaptive.rs @@ -2,16 +2,13 @@ use std::sync::Arc; use common_daft_config::DaftExecutionConfig; use daft_core::prelude::Schema; - -use crate::PhysicalPlanScheduler; -use daft_plan::InMemoryInfo; -use daft_plan::LogicalPlan; -use daft_plan::{AdaptivePlanner, MaterializedResults}; - +use daft_plan::{AdaptivePlanner, InMemoryInfo, LogicalPlan, MaterializedResults}; #[cfg(feature = "python")] use { common_daft_config::PyDaftExecutionConfig, daft_plan::PyLogicalPlanBuilder, pyo3::prelude::*, }; + +use crate::PhysicalPlanScheduler; /// A work scheduler for physical plans. #[cfg_attr(feature = "python", pyclass(module = "daft.daft"))] pub struct AdaptivePhysicalPlanScheduler { diff --git a/src/daft-scheduler/src/lib.rs b/src/daft-scheduler/src/lib.rs index f5d12b1a54..9f6de83d71 100644 --- a/src/daft-scheduler/src/lib.rs +++ b/src/daft-scheduler/src/lib.rs @@ -2,10 +2,9 @@ mod adaptive; mod scheduler; pub use adaptive::AdaptivePhysicalPlanScheduler; -pub use scheduler::PhysicalPlanScheduler; - #[cfg(feature = "python")] use pyo3::prelude::*; +pub use scheduler::PhysicalPlanScheduler; #[cfg(feature = "python")] pub fn register_modules(parent: &Bound) -> PyResult<()> { diff --git a/src/daft-scheduler/src/scheduler.rs b/src/daft-scheduler/src/scheduler.rs index 9dfbea2e39..f215c8b7cb 100644 --- a/src/daft-scheduler/src/scheduler.rs +++ b/src/daft-scheduler/src/scheduler.rs @@ -1,11 +1,17 @@ +use std::sync::Arc; + use common_display::mermaid::MermaidDisplayOptions; use common_error::DaftResult; use common_file_formats::FileFormat; use common_py_serde::impl_bincode_py_state_serialization; -use daft_plan::{logical_to_physical, PhysicalPlan, PhysicalPlanRef, QueryStageOutput}; - +use daft_dsl::ExprRef; +use daft_plan::{ + logical_to_physical, physical_ops::*, InMemoryInfo, PhysicalPlan, PhysicalPlanRef, + QueryStageOutput, +}; +#[cfg(feature = "python")] +use daft_plan::{DeltaLakeCatalogInfo, IcebergCatalogInfo, LanceCatalogInfo}; use serde::{Deserialize, Serialize}; - #[cfg(feature = "python")] use { common_daft_config::PyDaftExecutionConfig, @@ -20,15 +26,6 @@ use { std::collections::HashMap, }; -use daft_dsl::ExprRef; -use daft_plan::InMemoryInfo; -use std::sync::Arc; - -use daft_plan::physical_ops::*; - -#[cfg(feature = "python")] -use daft_plan::{DeltaLakeCatalogInfo, IcebergCatalogInfo, LanceCatalogInfo}; - /// A work scheduler for physical plans. #[cfg_attr(feature = "python", pyclass(module = "daft.daft"))] #[derive(Debug, Serialize, Deserialize)] diff --git a/src/daft-schema/src/dtype.rs b/src/daft-schema/src/dtype.rs index 2ed9a2c3d7..449302c185 100644 --- a/src/daft-schema/src/dtype.rs +++ b/src/daft-schema/src/dtype.rs @@ -1,14 +1,12 @@ use std::fmt::Write; use arrow2::datatypes::DataType as ArrowType; +use common_error::{DaftError, DaftResult}; use derive_more::Display; +use serde::{Deserialize, Serialize}; use crate::{field::Field, image_mode::ImageMode, time_unit::TimeUnit}; -use common_error::{DaftError, DaftResult}; - -use serde::{Deserialize, Serialize}; - #[derive(Clone, Debug, Display, PartialEq, Eq, Serialize, Deserialize, Hash)] pub enum DataType { // ArrowTypes: diff --git a/src/daft-schema/src/field.rs b/src/daft-schema/src/field.rs index 2ae1fbf3b0..774545fee4 100644 --- a/src/daft-schema/src/field.rs +++ b/src/daft-schema/src/field.rs @@ -1,14 +1,12 @@ -use std::hash::Hash; -use std::sync::Arc; +use std::{hash::Hash, sync::Arc}; use arrow2::datatypes::Field as ArrowField; - -use crate::dtype::DataType; use common_error::{DaftError, DaftResult}; use derive_more::Display; - use serde::{Deserialize, Serialize}; +use crate::dtype::DataType; + pub type Metadata = std::collections::BTreeMap; #[derive(Clone, Display, Debug, Eq, Deserialize, Serialize)] diff --git a/src/daft-schema/src/image_format.rs b/src/daft-schema/src/image_format.rs index a3951eaa02..f7f41a516f 100644 --- a/src/daft-schema/src/image_format.rs +++ b/src/daft-schema/src/image_format.rs @@ -1,12 +1,11 @@ -use derive_more::Display; use std::str::FromStr; +use common_error::{DaftError, DaftResult}; +use derive_more::Display; #[cfg(feature = "python")] use pyo3::{exceptions::PyValueError, prelude::*}; use serde::{Deserialize, Serialize}; -use common_error::{DaftError, DaftResult}; - /// Supported image formats for Daft's I/O layer. #[allow(clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Display, PartialEq, Eq, Serialize, Deserialize, Hash)] diff --git a/src/daft-schema/src/image_mode.rs b/src/daft-schema/src/image_mode.rs index 8bc3a481b5..be2eebd8d3 100644 --- a/src/daft-schema/src/image_mode.rs +++ b/src/daft-schema/src/image_mode.rs @@ -1,13 +1,13 @@ -use crate::dtype::DataType; +use std::str::FromStr; + +use common_error::{DaftError, DaftResult}; +use derive_more::Display; use num_derive::FromPrimitive; #[cfg(feature = "python")] use pyo3::{exceptions::PyValueError, prelude::*}; use serde::{Deserialize, Serialize}; -use std::str::FromStr; - -use derive_more::Display; -use common_error::{DaftError, DaftResult}; +use crate::dtype::DataType; /// Supported image modes for Daft's image type. /// diff --git a/src/daft-schema/src/prelude.rs b/src/daft-schema/src/prelude.rs index 442ef37f62..96d3a57709 100644 --- a/src/daft-schema/src/prelude.rs +++ b/src/daft-schema/src/prelude.rs @@ -1,11 +1,8 @@ -pub use crate::dtype::DataType; - -pub use crate::field::{Field, FieldID, FieldRef}; - -pub use crate::image_format::ImageFormat; - -pub use crate::image_mode::ImageMode; - -pub use crate::schema::{Schema, SchemaRef}; - -pub use crate::time_unit::{infer_timeunit_from_format_string, TimeUnit}; +pub use crate::{ + dtype::DataType, + field::{Field, FieldID, FieldRef}, + image_format::ImageFormat, + image_mode::ImageMode, + schema::{Schema, SchemaRef}, + time_unit::{infer_timeunit_from_format_string, TimeUnit}, +}; diff --git a/src/daft-schema/src/python/datatype.rs b/src/daft-schema/src/python/datatype.rs index 3baf3d9625..39f65d314f 100644 --- a/src/daft-schema/src/python/datatype.rs +++ b/src/daft-schema/src/python/datatype.rs @@ -1,15 +1,10 @@ -use crate::dtype::DataType; -use crate::field::Field; -use crate::image_mode::ImageMode; - use common_arrow_ffi as ffi; - use common_py_serde::impl_bincode_py_state_serialization; use indexmap::IndexMap; use pyo3::{class::basic::CompareOp, exceptions::PyValueError, prelude::*}; use serde::{Deserialize, Serialize}; -use crate::time_unit::TimeUnit; +use crate::{dtype::DataType, field::Field, image_mode::ImageMode, time_unit::TimeUnit}; #[pyclass] #[derive(Clone)] @@ -59,9 +54,10 @@ impl PyTimeUnit { } } pub fn __hash__(&self) -> u64 { - use std::collections::hash_map::DefaultHasher; - use std::hash::Hash; - use std::hash::Hasher; + use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + }; let mut hasher = DefaultHasher::new(); self.timeunit.hash(&mut hasher); hasher.finish() @@ -390,9 +386,10 @@ impl PyDataType { } pub fn __hash__(&self) -> u64 { - use std::collections::hash_map::DefaultHasher; - use std::hash::Hash; - use std::hash::Hasher; + use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + }; let mut hasher = DefaultHasher::new(); self.dtype.hash(&mut hasher); hasher.finish() diff --git a/src/daft-schema/src/python/field.rs b/src/daft-schema/src/python/field.rs index 214a762dec..2e39915843 100644 --- a/src/daft-schema/src/python/field.rs +++ b/src/daft-schema/src/python/field.rs @@ -1,9 +1,9 @@ +use common_py_serde::impl_bincode_py_state_serialization; use pyo3::prelude::*; use serde::{Deserialize, Serialize}; use super::datatype::PyDataType; use crate::field::Field; -use common_py_serde::impl_bincode_py_state_serialization; #[pyclass(module = "daft.daft")] #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/src/daft-schema/src/python/mod.rs b/src/daft-schema/src/python/mod.rs index 2f9b7dabd9..555e74d0c5 100644 --- a/src/daft-schema/src/python/mod.rs +++ b/src/daft-schema/src/python/mod.rs @@ -3,10 +3,9 @@ pub mod datatype; pub mod field; pub mod schema; -use crate::image_format::ImageFormat; -use crate::image_mode::ImageMode; -pub use datatype::PyDataType; -pub use datatype::PyTimeUnit; +pub use datatype::{PyDataType, PyTimeUnit}; + +use crate::{image_format::ImageFormat, image_mode::ImageMode}; pub fn register_modules(parent: &Bound) -> PyResult<()> { parent.add_class::()?; diff --git a/src/daft-schema/src/python/schema.rs b/src/daft-schema/src/python/schema.rs index e65bfe00c3..2aa39c9abc 100644 --- a/src/daft-schema/src/python/schema.rs +++ b/src/daft-schema/src/python/schema.rs @@ -1,14 +1,11 @@ use std::sync::Arc; +use common_py_serde::impl_bincode_py_state_serialization; use pyo3::prelude::*; - use serde::{Deserialize, Serialize}; -use super::datatype::PyDataType; -use super::field::PyField; -use crate::field::Field; -use crate::schema; -use common_py_serde::impl_bincode_py_state_serialization; +use super::{datatype::PyDataType, field::PyField}; +use crate::{field::Field, schema}; #[pyclass(module = "daft.daft")] #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/src/daft-schema/src/schema.rs b/src/daft-schema/src/schema.rs index 7a1cb5144c..7b0328be15 100644 --- a/src/daft-schema/src/schema.rs +++ b/src/daft-schema/src/schema.rs @@ -8,14 +8,13 @@ use common_display::{ table_display::{make_comfy_table, make_schema_vertical_table}, DisplayAs, }; +use common_error::{DaftError, DaftResult}; use derive_more::Display; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; use crate::field::Field; -use common_error::{DaftError, DaftResult}; - pub type SchemaRef = Arc; #[derive(Debug, Display, PartialEq, Eq, Serialize, Deserialize)] diff --git a/src/daft-schema/src/time_unit.rs b/src/daft-schema/src/time_unit.rs index dc98a825e5..50cdcb1e57 100644 --- a/src/daft-schema/src/time_unit.rs +++ b/src/daft-schema/src/time_unit.rs @@ -1,7 +1,5 @@ -use derive_more::Display; - use arrow2::datatypes::TimeUnit as ArrowTimeUnit; - +use derive_more::Display; use serde::{Deserialize, Serialize}; #[derive( diff --git a/src/daft-sketch/src/arrow2_serde.rs b/src/daft-sketch/src/arrow2_serde.rs index d0681002bc..cb36653e36 100644 --- a/src/daft-sketch/src/arrow2_serde.rs +++ b/src/daft-sketch/src/arrow2_serde.rs @@ -69,10 +69,11 @@ pub fn from_arrow2( #[cfg(test)] mod tests { - use crate::{from_arrow2, into_arrow2}; use common_error::DaftResult; use sketches_ddsketch::{Config, DDSketch}; + use crate::{from_arrow2, into_arrow2}; + #[test] fn test_roundtrip_single() -> DaftResult<()> { let mut sketch = DDSketch::new(Config::default()); diff --git a/src/daft-sql/src/lib.rs b/src/daft-sql/src/lib.rs index 12009e82ba..40ae6c57df 100644 --- a/src/daft-sql/src/lib.rs +++ b/src/daft-sql/src/lib.rs @@ -22,9 +22,6 @@ pub fn register_modules(parent: &Bound) -> PyResult<()> { mod tests { use std::sync::Arc; - use crate::planner::SQLPlanner; - - use super::*; use catalog::SQLCatalog; use daft_core::prelude::*; use daft_dsl::{col, lit}; @@ -35,6 +32,9 @@ mod tests { use error::SQLPlannerResult; use rstest::{fixture, rstest}; + use super::*; + use crate::planner::SQLPlanner; + #[fixture] fn tbl_1() -> LogicalPlanRef { let schema = Arc::new( diff --git a/src/daft-sql/src/modules/aggs.rs b/src/daft-sql/src/modules/aggs.rs index f2b70d68bf..74ee294fbc 100644 --- a/src/daft-sql/src/modules/aggs.rs +++ b/src/daft-sql/src/modules/aggs.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use daft_dsl::{col, AggExpr, Expr, ExprRef, LiteralValue}; use sqlparser::ast::{FunctionArg, FunctionArgExpr}; +use super::SQLModule; use crate::{ ensure, error::SQLPlannerResult, @@ -11,8 +12,6 @@ use crate::{ unsupported_sql_err, }; -use super::SQLModule; - pub struct SQLModuleAggs; impl SQLModule for SQLModuleAggs { diff --git a/src/daft-sql/src/modules/float.rs b/src/daft-sql/src/modules/float.rs index 3b1132ffe4..4cfffe34b4 100644 --- a/src/daft-sql/src/modules/float.rs +++ b/src/daft-sql/src/modules/float.rs @@ -1,10 +1,13 @@ -use super::SQLModule; -use crate::functions::SQLFunctions; use daft_dsl::ExprRef; use daft_functions::float; use sqlparser::ast::FunctionArg; -use crate::{error::SQLPlannerResult, functions::SQLFunction, unsupported_sql_err}; +use super::SQLModule; +use crate::{ + error::SQLPlannerResult, + functions::{SQLFunction, SQLFunctions}, + unsupported_sql_err, +}; pub struct SQLModuleFloat; diff --git a/src/daft-sql/src/modules/image/decode.rs b/src/daft-sql/src/modules/image/decode.rs index 78ca4d12af..a6b95d538d 100644 --- a/src/daft-sql/src/modules/image/decode.rs +++ b/src/daft-sql/src/modules/image/decode.rs @@ -1,4 +1,5 @@ use daft_dsl::{Expr, ExprRef, LiteralValue}; +use daft_functions::image::decode::{decode, ImageDecode}; use sqlparser::ast::FunctionArg; use crate::{ @@ -6,7 +7,6 @@ use crate::{ functions::{SQLFunction, SQLFunctionArguments}, unsupported_sql_err, }; -use daft_functions::image::decode::{decode, ImageDecode}; pub struct SQLImageDecode; diff --git a/src/daft-sql/src/modules/image/encode.rs b/src/daft-sql/src/modules/image/encode.rs index 445da73dd4..a902179f88 100644 --- a/src/daft-sql/src/modules/image/encode.rs +++ b/src/daft-sql/src/modules/image/encode.rs @@ -1,12 +1,12 @@ use common_error::DaftError; use daft_dsl::{Expr, ExprRef, LiteralValue}; +use daft_functions::image::encode::{encode, ImageEncode}; use crate::{ error::{PlannerError, SQLPlannerResult}, functions::{SQLFunction, SQLFunctionArguments}, unsupported_sql_err, }; -use daft_functions::image::encode::{encode, ImageEncode}; pub struct SQLImageEncode; diff --git a/src/daft-sql/src/modules/image/mod.rs b/src/daft-sql/src/modules/image/mod.rs index 4554b343c3..89a0527ef0 100644 --- a/src/daft-sql/src/modules/image/mod.rs +++ b/src/daft-sql/src/modules/image/mod.rs @@ -1,6 +1,5 @@ -use crate::functions::SQLFunctions; - use super::SQLModule; +use crate::functions::SQLFunctions; pub mod crop; pub mod decode; pub mod encode; diff --git a/src/daft-sql/src/modules/image/resize.rs b/src/daft-sql/src/modules/image/resize.rs index 6e9b1a8ead..8ce37eb7f8 100644 --- a/src/daft-sql/src/modules/image/resize.rs +++ b/src/daft-sql/src/modules/image/resize.rs @@ -1,4 +1,5 @@ use daft_dsl::{Expr, ExprRef, LiteralValue}; +use daft_functions::image::resize::{resize, ImageResize}; use crate::{ ensure, @@ -6,7 +7,6 @@ use crate::{ functions::{SQLFunction, SQLFunctionArguments}, unsupported_sql_err, }; -use daft_functions::image::resize::{resize, ImageResize}; pub struct SQLImageResize; diff --git a/src/daft-sql/src/modules/image/to_mode.rs b/src/daft-sql/src/modules/image/to_mode.rs index 11c1226ede..a02efb2d36 100644 --- a/src/daft-sql/src/modules/image/to_mode.rs +++ b/src/daft-sql/src/modules/image/to_mode.rs @@ -1,11 +1,11 @@ use daft_dsl::{Expr, ExprRef, LiteralValue}; +use daft_functions::image::to_mode::{image_to_mode, ImageToMode}; use crate::{ error::{PlannerError, SQLPlannerResult}, functions::{SQLFunction, SQLFunctionArguments}, unsupported_sql_err, }; -use daft_functions::image::to_mode::{image_to_mode, ImageToMode}; pub struct SQLImageToMode; diff --git a/src/daft-sql/src/modules/numeric.rs b/src/daft-sql/src/modules/numeric.rs index 06aee7e71e..078878faef 100644 --- a/src/daft-sql/src/modules/numeric.rs +++ b/src/daft-sql/src/modules/numeric.rs @@ -1,3 +1,8 @@ +use daft_dsl::{ + functions::{self, numeric::NumericExpr}, + ExprRef, LiteralValue, +}; + use super::SQLModule; use crate::{ ensure, @@ -5,10 +10,6 @@ use crate::{ functions::{SQLFunction, SQLFunctions}, invalid_operation_err, }; -use daft_dsl::{ - functions::{self, numeric::NumericExpr}, - ExprRef, LiteralValue, -}; pub struct SQLModuleNumeric; diff --git a/src/daft-sql/src/modules/utf8.rs b/src/daft-sql/src/modules/utf8.rs index ac9de3264f..263a8bd9e7 100644 --- a/src/daft-sql/src/modules/utf8.rs +++ b/src/daft-sql/src/modules/utf8.rs @@ -3,13 +3,12 @@ use daft_dsl::{ ExprRef, LiteralValue, }; +use super::SQLModule; use crate::{ ensure, error::SQLPlannerResult, functions::SQLFunction, invalid_operation_err, unsupported_sql_err, }; -use super::SQLModule; - pub struct SQLModuleUtf8; impl SQLModule for SQLModuleUtf8 { diff --git a/src/daft-sql/src/planner.rs b/src/daft-sql/src/planner.rs index 08abedf903..750413e579 100644 --- a/src/daft-sql/src/planner.rs +++ b/src/daft-sql/src/planner.rs @@ -1,11 +1,5 @@ use std::sync::Arc; -use crate::{ - catalog::SQLCatalog, - column_not_found_err, - error::{PlannerError, SQLPlannerResult}, - invalid_operation_err, table_not_found_err, unsupported_sql_err, -}; use daft_core::prelude::*; use daft_dsl::{ col, @@ -16,7 +10,6 @@ use daft_dsl::{ has_agg, lit, literals_to_series, null_lit, Expr, ExprRef, LiteralValue, Operator, }; use daft_plan::{LogicalPlanBuilder, LogicalPlanRef}; - use sqlparser::{ ast::{ ArrayElemTypeDef, BinaryOperator, CastKind, ExactNumberInfo, GroupByExpr, Ident, Query, @@ -27,6 +20,13 @@ use sqlparser::{ parser::{Parser, ParserOptions}, tokenizer::Tokenizer, }; + +use crate::{ + catalog::SQLCatalog, + column_not_found_err, + error::{PlannerError, SQLPlannerResult}, + invalid_operation_err, table_not_found_err, unsupported_sql_err, +}; /// A named logical plan /// This is used to keep track of the table name associated with a logical plan while planning a SQL query #[derive(Debug, Clone)] diff --git a/src/daft-stats/src/column_stats/comparison.rs b/src/daft-stats/src/column_stats/comparison.rs index b3bbbdc513..1d3d923666 100644 --- a/src/daft-stats/src/column_stats/comparison.rs +++ b/src/daft-stats/src/column_stats/comparison.rs @@ -1,10 +1,10 @@ use std::ops::Not; -use crate::DaftCoreComputeSnafu; use daft_core::prelude::*; use snafu::ResultExt; use super::ColumnRangeStatistics; +use crate::DaftCoreComputeSnafu; impl DaftCompare<&ColumnRangeStatistics> for ColumnRangeStatistics { type Output = crate::Result; diff --git a/src/daft-stats/src/column_stats/logical.rs b/src/daft-stats/src/column_stats/logical.rs index c91175ce1a..29b2d47421 100644 --- a/src/daft-stats/src/column_stats/logical.rs +++ b/src/daft-stats/src/column_stats/logical.rs @@ -1,7 +1,6 @@ use snafu::ResultExt; use super::{ColumnRangeStatistics, TruthValue}; - use crate::DaftCoreComputeSnafu; impl std::ops::Not for &ColumnRangeStatistics { diff --git a/src/daft-stats/src/column_stats/mod.rs b/src/daft-stats/src/column_stats/mod.rs index f2f5f507b6..fb78fe3feb 100644 --- a/src/daft-stats/src/column_stats/mod.rs +++ b/src/daft-stats/src/column_stats/mod.rs @@ -249,9 +249,8 @@ mod test { use daft_core::prelude::*; - use crate::column_stats::TruthValue; - use super::ColumnRangeStatistics; + use crate::column_stats::TruthValue; #[test] fn test_equal() -> crate::Result<()> { diff --git a/src/daft-stats/src/table_stats.rs b/src/daft-stats/src/table_stats.rs index 818fc9cdd9..40b2e220c2 100644 --- a/src/daft-stats/src/table_stats.rs +++ b/src/daft-stats/src/table_stats.rs @@ -5,14 +5,13 @@ use std::{ }; use common_error::{DaftError, DaftResult}; +use daft_core::prelude::*; use daft_dsl::{Expr, ExprRef}; use daft_table::Table; use indexmap::{IndexMap, IndexSet}; use crate::column_stats::ColumnRangeStatistics; -use daft_core::prelude::*; - #[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)] pub struct TableStatistics { pub columns: IndexMap, @@ -197,13 +196,11 @@ impl Display for TableStatistics { mod test { use daft_core::prelude::*; - use daft_dsl::{col, lit}; use daft_table::Table; - use crate::column_stats::TruthValue; - use super::TableStatistics; + use crate::column_stats::TruthValue; #[test] fn test_equal() -> crate::Result<()> { diff --git a/src/daft-table/src/ffi.rs b/src/daft-table/src/ffi.rs index 15c2ce96a2..37a118c50e 100644 --- a/src/daft-table/src/ffi.rs +++ b/src/daft-table/src/ffi.rs @@ -1,15 +1,12 @@ -use pyo3::exceptions::PyValueError; - -use pyo3::prelude::*; -use pyo3::types::PyList; - -use crate::Table; use common_error::DaftResult; use daft_core::{ prelude::SchemaRef, series::Series, utils::arrow::{cast_array_for_daft_if_needed, cast_array_from_daft_if_needed}, }; +use pyo3::{exceptions::PyValueError, prelude::*, types::PyList}; + +use crate::Table; pub fn record_batches_to_table( py: Python, diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index d8c4ba4fca..c2a5a622a7 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -2,22 +2,24 @@ #![feature(let_chains)] use core::slice; -use std::collections::{HashMap, HashSet}; -use std::fmt::{Display, Formatter, Result}; +use std::{ + collections::{HashMap, HashSet}, + fmt::{Display, Formatter, Result}, +}; use common_display::table_display::{make_comfy_table, StrValue}; -use daft_core::array::ops::full::FullNull; -use num_traits::ToPrimitive; - -use daft_core::array::ops::{DaftApproxCountDistinctAggable, DaftHllSketchAggable, GroupIndices}; - use common_error::{DaftError, DaftResult}; -use daft_core::prelude::*; - -use daft_dsl::functions::FunctionEvaluator; +use daft_core::{ + array::ops::{ + full::FullNull, DaftApproxCountDistinctAggable, DaftHllSketchAggable, GroupIndices, + }, + prelude::*, +}; use daft_dsl::{ - col, null_lit, AggExpr, ApproxPercentileParams, Expr, ExprRef, LiteralValue, SketchType, + col, functions::FunctionEvaluator, null_lit, AggExpr, ApproxPercentileParams, Expr, ExprRef, + LiteralValue, SketchType, }; +use num_traits::ToPrimitive; #[cfg(feature = "python")] pub mod ffi; mod growable; @@ -26,7 +28,6 @@ mod probe_table; mod repr_html; pub use growable::GrowableTable; - pub use probe_table::{ProbeTable, ProbeTableBuilder}; #[cfg(feature = "python")] @@ -796,12 +797,12 @@ impl<'a> IntoIterator for &'a Table { #[cfg(test)] mod test { - use crate::Table; use common_error::DaftResult; use daft_core::prelude::*; - use daft_dsl::col; + use crate::Table; + #[test] fn add_int_and_float_expression() -> DaftResult<()> { let a = Int64Array::from(("a", vec![1, 2, 3])).into_series(); diff --git a/src/daft-table/src/ops/agg.rs b/src/daft-table/src/ops/agg.rs index 51dd7d2587..33bbf635b6 100644 --- a/src/daft-table/src/ops/agg.rs +++ b/src/daft-table/src/ops/agg.rs @@ -1,13 +1,9 @@ -use daft_core::{array::ops::IntoGroups, prelude::*}; - -use daft_dsl::{functions::FunctionExpr, AggExpr, Expr}; - use common_error::{DaftError, DaftResult}; +use daft_core::{array::ops::IntoGroups, prelude::*}; +use daft_dsl::{functions::FunctionExpr, AggExpr, Expr, ExprRef}; use crate::Table; -use daft_dsl::ExprRef; - impl Table { pub fn agg(&self, to_agg: &[ExprRef], group_by: &[ExprRef]) -> DaftResult { // Dispatch depending on whether we're doing groupby or just a global agg. diff --git a/src/daft-table/src/ops/explode.rs b/src/daft-table/src/ops/explode.rs index 62527fd549..85c694c3c2 100644 --- a/src/daft-table/src/ops/explode.rs +++ b/src/daft-table/src/ops/explode.rs @@ -1,10 +1,9 @@ use common_error::{DaftError, DaftResult}; -use daft_core::count_mode::CountMode; -use daft_core::series::IntoSeries; use daft_core::{ array::ops::as_arrow::AsArrow, + count_mode::CountMode, datatypes::{DataType, UInt64Array}, - series::Series, + series::{IntoSeries, Series}, }; use daft_dsl::Expr; diff --git a/src/daft-table/src/ops/hash.rs b/src/daft-table/src/ops/hash.rs index 7c2a188e95..0abdcb8867 100644 --- a/src/daft-table/src/ops/hash.rs +++ b/src/daft-table/src/ops/hash.rs @@ -5,12 +5,11 @@ use std::{ use common_error::{DaftError, DaftResult}; use daft_core::{ - array::ops::arrow2::comparison::build_multi_array_is_equal, datatypes::UInt64Array, + array::ops::{arrow2::comparison::build_multi_array_is_equal, as_arrow::AsArrow}, + datatypes::UInt64Array, utils::identity_hash_set::IdentityBuildHasher, }; -use daft_core::array::ops::as_arrow::AsArrow; - use crate::Table; pub struct IndexHash { diff --git a/src/daft-table/src/ops/joins/hash_join.rs b/src/daft-table/src/ops/joins/hash_join.rs index ecca1a0f5d..1da3ea5f16 100644 --- a/src/daft-table/src/ops/joins/hash_join.rs +++ b/src/daft-table/src/ops/joins/hash_join.rs @@ -1,20 +1,18 @@ use std::{cmp, iter::repeat}; use arrow2::{bitmap::MutableBitmap, types::IndexRange}; -use daft_core::prelude::*; - +use common_error::DaftResult; +use daft_core::{ + array::ops::{arrow2::comparison::build_multi_array_is_equal, as_arrow::AsArrow}, + prelude::*, +}; use daft_dsl::{ join::{get_common_join_keys, infer_join_schema}, ExprRef, }; -use crate::Table; -use common_error::DaftResult; - -use daft_core::array::ops::as_arrow::AsArrow; - use super::{add_non_join_key_columns, match_types_for_tables}; -use daft_core::array::ops::arrow2::comparison::build_multi_array_is_equal; +use crate::Table; pub(super) fn hash_inner_join( left: &Table, right: &Table, diff --git a/src/daft-table/src/ops/joins/merge_join.rs b/src/daft-table/src/ops/joins/merge_join.rs index 4cef41bbc2..eb57db2d1e 100644 --- a/src/daft-table/src/ops/joins/merge_join.rs +++ b/src/daft-table/src/ops/joins/merge_join.rs @@ -1,5 +1,6 @@ use std::cmp::Ordering; +use common_error::{DaftError, DaftResult}; use daft_core::{ array::ops::full::FullNull, datatypes::{DataType, UInt64Array}, @@ -8,7 +9,6 @@ use daft_core::{ }; use crate::Table; -use common_error::{DaftError, DaftResult}; /// A state machine for the below merge-join algorithm. /// diff --git a/src/daft-table/src/ops/joins/mod.rs b/src/daft-table/src/ops/joins/mod.rs index 39a751beea..2d5e80dae5 100644 --- a/src/daft-table/src/ops/joins/mod.rs +++ b/src/daft-table/src/ops/joins/mod.rs @@ -1,19 +1,15 @@ use std::collections::HashSet; -use daft_core::{prelude::*, utils::supertype::try_get_supertype}; - use common_error::{DaftError, DaftResult}; +use daft_core::{array::growable::make_growable, prelude::*, utils::supertype::try_get_supertype}; use daft_dsl::{ join::{get_common_join_keys, infer_join_schema}, ExprRef, }; use hash_join::hash_semi_anti_join; -use crate::Table; - use self::hash_join::{hash_inner_join, hash_left_right_join, hash_outer_join}; - -use daft_core::array::growable::make_growable; +use crate::Table; mod hash_join; mod merge_join; diff --git a/src/daft-table/src/ops/partition.rs b/src/daft-table/src/ops/partition.rs index e5364bd998..6d07d3f778 100644 --- a/src/daft-table/src/ops/partition.rs +++ b/src/daft-table/src/ops/partition.rs @@ -1,15 +1,15 @@ use std::ops::Rem; use arrow2::array::{Array, DictionaryKey}; -use daft_core::array::ops::IntoGroups; +use common_error::{DaftError, DaftResult}; +use daft_core::{ + array::ops::{as_arrow::AsArrow, IntoGroups}, + datatypes::UInt64Array, + series::IntoSeries, +}; use daft_dsl::ExprRef; use rand::SeedableRng; -use common_error::{DaftError, DaftResult}; -use daft_core::{datatypes::UInt64Array, series::IntoSeries}; - -use daft_core::array::ops::as_arrow::AsArrow; - use crate::Table; impl Table { diff --git a/src/daft-table/src/ops/pivot.rs b/src/daft-table/src/ops/pivot.rs index 355b6b5a68..a0b07d20cd 100644 --- a/src/daft-table/src/ops/pivot.rs +++ b/src/daft-table/src/ops/pivot.rs @@ -1,9 +1,9 @@ -use crate::Table; use common_error::{DaftError, DaftResult}; use daft_core::{array::ops::IntoGroups, prelude::*}; - use daft_dsl::ExprRef; +use crate::Table; + fn map_name_to_pivot_key_idx<'a>( pivot_series: &'a Series, pivot_key_indices: &'a [u64], diff --git a/src/daft-table/src/ops/sort.rs b/src/daft-table/src/ops/sort.rs index 39f4e66d04..de082b1970 100644 --- a/src/daft-table/src/ops/sort.rs +++ b/src/daft-table/src/ops/sort.rs @@ -1,8 +1,9 @@ -use crate::Table; use common_error::{DaftError, DaftResult}; use daft_core::series::Series; use daft_dsl::ExprRef; +use crate::Table; + impl Table { pub fn sort(&self, sort_keys: &[ExprRef], descending: &[bool]) -> DaftResult
{ let argsort = self.argsort(sort_keys, descending)?; diff --git a/src/daft-table/src/ops/unpivot.rs b/src/daft-table/src/ops/unpivot.rs index 1487d63c7c..37fba415fc 100644 --- a/src/daft-table/src/ops/unpivot.rs +++ b/src/daft-table/src/ops/unpivot.rs @@ -1,6 +1,5 @@ use common_error::{DaftError, DaftResult}; use daft_core::{prelude::*, series::cast_series_to_supertype}; - use daft_dsl::ExprRef; use crate::Table; diff --git a/src/daft-table/src/probe_table/mod.rs b/src/daft-table/src/probe_table/mod.rs index ff985a1051..2c0e8a88b1 100644 --- a/src/daft-table/src/probe_table/mod.rs +++ b/src/daft-table/src/probe_table/mod.rs @@ -1,11 +1,14 @@ use std::collections::{hash_map::RawEntryMut, HashMap}; use common_error::DaftResult; - -use daft_core::utils::dyn_compare::{build_dyn_multi_array_compare, MultiDynArrayComparator}; -use daft_core::utils::identity_hash_set::IdentityBuildHasher; - -use daft_core::{array::ops::as_arrow::AsArrow, prelude::SchemaRef}; +use daft_core::{ + array::ops::as_arrow::AsArrow, + prelude::SchemaRef, + utils::{ + dyn_compare::{build_dyn_multi_array_compare, MultiDynArrayComparator}, + identity_hash_set::IdentityBuildHasher, + }, +}; use crate::{ops::hash::IndexHash, Table}; diff --git a/src/daft-table/src/python.rs b/src/daft-table/src/python.rs index de1d685a60..728383d23c 100644 --- a/src/daft-table/src/python.rs +++ b/src/daft-table/src/python.rs @@ -1,16 +1,14 @@ -use daft_core::join::JoinType; -use indexmap::IndexMap; -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; - -use crate::ffi; -use crate::Table; use common_error::DaftError; -use daft_core::prelude::*; +use daft_core::{ + join::JoinType, + prelude::*, + python::{series::PySeries, PySchema}, +}; use daft_dsl::python::PyExpr; +use indexmap::IndexMap; +use pyo3::{exceptions::PyValueError, prelude::*}; -use daft_core::python::series::PySeries; -use daft_core::python::PySchema; +use crate::{ffi, Table}; #[pyclass] #[derive(Clone)] diff --git a/src/daft-table/src/repr_html.rs b/src/daft-table/src/repr_html.rs index aaabc6efa8..4f81ec11cb 100644 --- a/src/daft-table/src/repr_html.rs +++ b/src/daft-table/src/repr_html.rs @@ -1,5 +1,4 @@ -use daft_core::datatypes::ExtensionArray; -use daft_core::{prelude::DataType, series::Series}; +use daft_core::{datatypes::ExtensionArray, prelude::DataType, series::Series}; pub fn html_value(s: &Series, idx: usize) -> String { match s.data_type() {