Skip to content

Commit

Permalink
[FEAT] Bitwise 'AND' 'OR' 'XOR' Operations (#2365)
Browse files Browse the repository at this point in the history
Hey @colin-ho / @jaychia : When i'm trying to Implement Daft Logic for
Numeric values in src/array/ops/comparison.rs i'm getting several errors
while build. I've done changes to binary_ops.rs to logical_ops()
accommodate Numeric Types. Please take a look at it and let me know if
i'm doing something wrong.

Attached the build error i'm facing for reference
[build_error.txt](https://github.com/user-attachments/files/15832189/build_error.txt)

---------

Co-authored-by: Colin Ho <[email protected]>
  • Loading branch information
mrutunjay-kinagi and colin-ho authored Jun 26, 2024
1 parent 9b94984 commit 3af6069
Show file tree
Hide file tree
Showing 17 changed files with 349 additions and 47 deletions.
24 changes: 22 additions & 2 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def __rmod__(self, other: Expression) -> Expression:
return Expression._from_pyexpr(expr._expr % self._expr)

def __and__(self, other: Expression) -> Expression:
"""Takes the logical AND of two boolean expressions (``e1 & e2``)"""
"""Takes the logical AND of two boolean expressions, or bitwise AND of two integer expressions (``e1 & e2``)"""
expr = Expression._to_expression(other)
return Expression._from_pyexpr(self._expr & expr._expr)

Expand All @@ -275,10 +275,15 @@ def __rand__(self, other: Expression) -> Expression:
return Expression._from_pyexpr(expr._expr & self._expr)

def __or__(self, other: Expression) -> Expression:
"""Takes the logical OR of two boolean expressions (``e1 | e2``)"""
"""Takes the logical OR of two boolean or integer expressions, or bitwise OR of two integer expressions (``e1 | e2``)"""
expr = Expression._to_expression(other)
return Expression._from_pyexpr(self._expr | expr._expr)

def __xor__(self, other: Expression) -> Expression:
"""Takes the logical XOR of two boolean or integer expressions, or bitwise XOR of two integer expressions (``e1 ^ e2``)"""
expr = Expression._to_expression(other)
return Expression._from_pyexpr(self._expr ^ expr._expr)

def __ror__(self, other: Expression) -> Expression:
"""Takes the logical reverse OR of two boolean expressions (``e1 | e2``)"""
expr = Expression._to_expression(other)
Expand Down Expand Up @@ -468,6 +473,21 @@ def exp(self) -> Expression:
expr = self._expr.exp()
return Expression._from_pyexpr(expr)

def bitwise_and(self, other: Expression) -> Expression:
"""Bitwise AND of two integer expressions (``expr.bitwise_and(other)``)"""
expr = Expression._to_expression(other)
return Expression._from_pyexpr(self._expr & expr._expr)

def bitwise_or(self, other: Expression) -> Expression:
"""Bitwise OR of two integer expressions (``expr.bitwise_or(other)``)"""
expr = Expression._to_expression(other)
return Expression._from_pyexpr(self._expr | expr._expr)

def bitwise_xor(self, other: Expression) -> Expression:
"""Bitwise XOR of two integer expressions (``expr.bitwise_xor(other)``)"""
expr = Expression._to_expression(other)
return Expression._from_pyexpr(self._expr ^ expr._expr)

def count(self, mode: CountMode = CountMode.Valid) -> Expression:
"""Counts the number of values in the expression.
Expand Down
31 changes: 31 additions & 0 deletions src/daft-core/src/array/ops/bitwise.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use crate::{
array::DataArray,
datatypes::{DaftIntegerType, DaftNumericType},
};

use common_error::DaftResult;

use std::ops::{BitAnd, BitOr, BitXor};

use super::DaftLogical;

impl<T> DaftLogical<&DataArray<T>> for DataArray<T>
where
T: DaftIntegerType,
<T as DaftNumericType>::Native:
Ord + BitAnd<Output = T::Native> + BitOr<Output = T::Native> + BitXor<Output = T::Native>,
{
type Output = DaftResult<Self>;

fn and(&self, rhs: &DataArray<T>) -> Self::Output {
self.binary_apply(rhs, |lhs, rhs| lhs.bitand(rhs))
}

fn or(&self, rhs: &DataArray<T>) -> Self::Output {
self.binary_apply(rhs, |lhs, rhs| lhs.bitor(rhs))
}

fn xor(&self, rhs: &DataArray<T>) -> Self::Output {
self.binary_apply(rhs, |lhs, rhs| lhs.bitxor(rhs))
}
}
1 change: 1 addition & 0 deletions src/daft-core/src/array/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod arithmetic;
pub mod arrow2;
pub mod as_arrow;
mod between;
mod bitwise;
pub(crate) mod broadcast;
pub(crate) mod cast;
mod ceil;
Expand Down
28 changes: 19 additions & 9 deletions src/daft-core/src/datatypes/binary_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,28 @@ impl DataType {
use DataType::*;
match (self, other) {
#[cfg(feature = "python")]
(Python, _) | (_, Python) => Ok(()),
(Boolean, Boolean) | (Boolean, Null) | (Null, Boolean) => Ok(()),
_ => Err(()),
}
.map(|()| Boolean)
.map_err(|()| {
DaftError::TypeError(format!(
(Python, _) | (_, Python) => Ok(Boolean),
(Boolean, Boolean) | (Boolean, Null) | (Null, Boolean) => Ok(Boolean),
(s, o) if s.is_integer() && o.is_integer() => {
let dtype = try_numeric_supertype(s, o)?;
if dtype.is_floating() {
Err(DaftError::TypeError(format!(
"Cannot perform logic on types: {}, {}",
self, other
)))
} else {
Ok(dtype)
}
}
(s, o) if (s.is_integer() && o.is_null()) => Ok(s.clone()),
(s, o) if (s.is_null() && o.is_integer()) => Ok(o.clone()),
_ => Err(DaftError::TypeError(format!(
"Cannot perform logic on types: {}, {}",
self, other
))
})
))),
}
}

pub fn comparison_op(
&self,
other: &Self,
Expand Down
6 changes: 3 additions & 3 deletions src/daft-core/src/python/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,15 @@ impl PySeries {
}

pub fn __and__(&self, other: &Self) -> PyResult<Self> {
Ok(self.series.and(&other.series)?.into_series().into())
Ok(self.series.and(&other.series)?.into())
}

pub fn __or__(&self, other: &Self) -> PyResult<Self> {
Ok(self.series.or(&other.series)?.into_series().into())
Ok(self.series.or(&other.series)?.into())
}

pub fn __xor__(&self, other: &Self) -> PyResult<Self> {
Ok(self.series.xor(&other.series)?.into_series().into())
Ok(self.series.xor(&other.series)?.into())
}

pub fn ceil(&self) -> PyResult<Self> {
Expand Down
34 changes: 22 additions & 12 deletions src/daft-core/src/series/array_impl/binary_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use crate::{
FixedSizeBinaryArray, Int128Array,
},
series::series_like::SeriesLike,
with_match_comparable_daft_types, with_match_numeric_daft_types, DataType,
with_match_comparable_daft_types, with_match_integer_daft_types, with_match_numeric_daft_types,
DataType,
};

use crate::datatypes::logical::{
Expand Down Expand Up @@ -101,16 +102,25 @@ macro_rules! physical_logic_op {
let output_type = ($self.data_type().logical_op($rhs.data_type()))?;
let lhs = $self.into_series();
use DataType::*;
if let Boolean = output_type {
match (&lhs.data_type(), &$rhs.data_type()) {
match &output_type {
#[cfg(feature = "python")]
Boolean => match (&lhs.data_type(), &$rhs.data_type()) {
#[cfg(feature = "python")]
(Python, _) | (_, Python) => py_binary_op_bool!(lhs, $rhs, $pyop)
.downcast::<BooleanArray>()
.cloned(),
_ => cast_downcast_op!(lhs, $rhs, &Boolean, BooleanArray, $op),
(Python, _) | (_, Python) => Ok(py_binary_op_bool!(lhs, $rhs, $pyop)),
_ => cast_downcast_op_into_series!(lhs, $rhs, &Boolean, BooleanArray, $op),
},
output_type if output_type.is_integer() => {
with_match_integer_daft_types!(output_type, |$T| {
cast_downcast_op_into_series!(
lhs,
$rhs,
output_type,
<$T as DaftDataType>::ArrayType,
$op
)
})
}
} else {
unreachable!()
_ => binary_op_unimplemented!(lhs, $pyop, $rhs, output_type),
}
}};
}
Expand Down Expand Up @@ -180,13 +190,13 @@ pub(crate) trait SeriesBinaryOps: SeriesLike {
fn rem(&self, rhs: &Series) -> DaftResult<Series> {
py_numeric_binary_op!(self, rhs, rem, "mod")
}
fn and(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn and(&self, rhs: &Series) -> DaftResult<Series> {
physical_logic_op!(self, rhs, and, "and_")
}
fn or(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn or(&self, rhs: &Series) -> DaftResult<Series> {
physical_logic_op!(self, rhs, or, "or_")
}
fn xor(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn xor(&self, rhs: &Series) -> DaftResult<Series> {
physical_logic_op!(self, rhs, xor, "xor")
}
fn equal(&self, rhs: &Series) -> DaftResult<BooleanArray> {
Expand Down
6 changes: 3 additions & 3 deletions src/daft-core/src/series/array_impl/data_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,13 @@ macro_rules! impl_series_like_for_data_array {
SeriesBinaryOps::rem(self, rhs)
}

fn and(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn and(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::and(self, rhs)
}
fn or(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn or(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::or(self, rhs)
}
fn xor(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn xor(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::xor(self, rhs)
}

Expand Down
6 changes: 3 additions & 3 deletions src/daft-core/src/series/array_impl/logical_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,13 +195,13 @@ macro_rules! impl_series_like_for_logical_array {
fn rem(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::rem(self, rhs)
}
fn and(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn and(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::and(self, rhs)
}
fn or(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn or(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::or(self, rhs)
}
fn xor(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn xor(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::xor(self, rhs)
}
fn equal(&self, rhs: &Series) -> DaftResult<BooleanArray> {
Expand Down
6 changes: 3 additions & 3 deletions src/daft-core/src/series/array_impl/nested_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,13 @@ macro_rules! impl_series_like_for_nested_arrays {
SeriesBinaryOps::rem(self, rhs)
}

fn and(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn and(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::and(self, rhs)
}
fn or(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn or(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::or(self, rhs)
}
fn xor(&self, rhs: &Series) -> DaftResult<BooleanArray> {
fn xor(&self, rhs: &Series) -> DaftResult<Series> {
SeriesBinaryOps::xor(self, rhs)
}

Expand Down
2 changes: 1 addition & 1 deletion src/daft-core/src/series/ops/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ impl DaftCompare<&Series> for Series {
}

impl DaftLogical<&Series> for Series {
type Output = DaftResult<BooleanArray>;
type Output = DaftResult<Series>;

call_inner!(and);
call_inner!(or);
Expand Down
6 changes: 3 additions & 3 deletions src/daft-core/src/series/series_like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ pub trait SeriesLike: Send + Sync + Any + std::fmt::Debug {
fn mul(&self, rhs: &Series) -> DaftResult<Series>;
fn div(&self, rhs: &Series) -> DaftResult<Series>;
fn rem(&self, rhs: &Series) -> DaftResult<Series>;
fn and(&self, rhs: &Series) -> DaftResult<BooleanArray>;
fn or(&self, rhs: &Series) -> DaftResult<BooleanArray>;
fn xor(&self, rhs: &Series) -> DaftResult<BooleanArray>;
fn and(&self, rhs: &Series) -> DaftResult<Series>;
fn or(&self, rhs: &Series) -> DaftResult<Series>;
fn xor(&self, rhs: &Series) -> DaftResult<Series>;
fn equal(&self, rhs: &Series) -> DaftResult<BooleanArray>;
fn not_equal(&self, rhs: &Series) -> DaftResult<BooleanArray>;
fn lt(&self, rhs: &Series) -> DaftResult<BooleanArray>;
Expand Down
9 changes: 8 additions & 1 deletion src/daft-stats/src/partition_spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,14 @@ impl PartialEq for PartitionSpec {
// partitioning on columns that may have nulls.
let self_null = self_column.is_null().unwrap();
let other_null = other_column.is_null().unwrap();
if self_null.xor(&other_null).unwrap().get(0).unwrap() {
if self_null
.xor(&other_null)
.unwrap()
.bool()
.unwrap()
.get(0)
.unwrap()
{
return false;
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/daft-table/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,9 @@ impl Table {
NotEq => Ok(lhs.not_equal(&rhs)?.into_series()),
GtEq => Ok(lhs.gte(&rhs)?.into_series()),
Gt => Ok(lhs.gt(&rhs)?.into_series()),
And => Ok(lhs.and(&rhs)?.into_series()),
Or => Ok(lhs.or(&rhs)?.into_series()),
Xor => Ok(lhs.xor(&rhs)?.into_series()),
And => lhs.and(&rhs),
Or => lhs.or(&rhs),
Xor => lhs.xor(&rhs),
_ => panic!("{op:?} not supported"),
}
}
Expand Down
32 changes: 30 additions & 2 deletions tests/expressions/typing/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,20 @@


ALL_TEMPORAL_DTYPES = [
(DataType.date(), pa.array([datetime.date(2021, 1, 1), datetime.date(2021, 1, 2), None], type=pa.date32())),
(
DataType.date(),
pa.array(
[datetime.date(2021, 1, 1), datetime.date(2021, 1, 2), None],
type=pa.date32(),
),
),
*[
(
DataType.timestamp(unit),
pa.array([datetime.datetime(2021, 1, 1), datetime.datetime(2021, 1, 2), None], type=pa.timestamp(unit)),
pa.array(
[datetime.datetime(2021, 1, 1), datetime.datetime(2021, 1, 2), None],
type=pa.timestamp(unit),
),
)
for unit in ["ns", "us", "ms"]
],
Expand Down Expand Up @@ -175,6 +184,25 @@ def is_numeric(dt: DataType) -> bool:
)


def is_integer(dt: DataType) -> bool:
"""Checks if this type is a signed integer type"""
return (
dt == DataType.int8()
or dt == DataType.int16()
or dt == DataType.int32()
or dt == DataType.int64()
or dt == DataType.uint8()
or dt == DataType.uint16()
or dt == DataType.uint32()
or dt == DataType.uint64()
)


def is_signed_integer(dt: DataType) -> bool:
"""Checks if this type is a signed integer type"""
return dt == DataType.int8() or dt == DataType.int16() or dt == DataType.int32() or dt == DataType.int64()


def is_comparable(dt: DataType):
"""Checks if this type is a comparable type"""
return (
Expand Down
17 changes: 15 additions & 2 deletions tests/expressions/typing/test_logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,25 @@

from daft.datatype import DataType
from daft.expressions import col
from tests.expressions.typing.conftest import assert_typing_resolve_vs_runtime_behavior
from tests.expressions.typing.conftest import (
assert_typing_resolve_vs_runtime_behavior,
is_integer,
is_signed_integer,
)


def logical_resolvable(lhs: DataType, rhs: DataType) -> bool:
# Must have a Bool on one side; may have a Bool or Null on the other.
return {lhs, rhs} in ({DataType.bool()}, {DataType.bool(), DataType.null()})
if is_integer(lhs) and is_integer(rhs):
if (lhs == DataType.uint64() and is_signed_integer(rhs)) or (
rhs == DataType.uint64() and is_signed_integer(lhs)
):
return False
return True
elif (is_integer(lhs) and rhs == DataType.null()) or (is_integer(rhs) and lhs == DataType.null()):
return True
else:
return {lhs, rhs} in ({DataType.bool()}, {DataType.bool(), DataType.null()})


@pytest.mark.parametrize("op", [ops.and_, ops.or_])
Expand Down
Loading

0 comments on commit 3af6069

Please sign in to comment.