Skip to content

Commit

Permalink
[CHORE]: Move image kernel out of daft-core (#2804)
Browse files Browse the repository at this point in the history
  • Loading branch information
universalmind303 authored Sep 10, 2024
1 parent 2a89d0d commit 3e2d25b
Show file tree
Hide file tree
Showing 40 changed files with 1,577 additions and 1,356 deletions.
19 changes: 17 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ daft-core = {path = "src/daft-core", default-features = false}
daft-csv = {path = "src/daft-csv", default-features = false}
daft-dsl = {path = "src/daft-dsl", default-features = false}
daft-functions = {path = "src/daft-functions", default-features = false}
daft-image = {path = "src/daft-image", default-features = false}
daft-io = {path = "src/daft-io", default-features = false}
daft-json = {path = "src/daft-json", default-features = false}
daft-local-execution = {path = "src/daft-local-execution", default-features = false}
Expand Down Expand Up @@ -41,6 +42,7 @@ python = [
"daft-dsl/python",
"daft-local-execution/python",
"daft-io/python",
"daft-image/python",
"daft-json/python",
"daft-micropartition/python",
"daft-parquet/python",
Expand Down Expand Up @@ -114,6 +116,7 @@ members = [
"src/daft-core",
"src/daft-local-execution",
"src/daft-io",
"src/daft-image",
"src/daft-parquet",
"src/daft-csv",
"src/daft-json",
Expand Down
4 changes: 0 additions & 4 deletions daft/daft.pyi → daft/daft/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1385,10 +1385,6 @@ class PySeries:
def list_slice(self, start: PySeries, end: PySeries | None = None) -> PySeries: ...
def list_sort(self, desc: PySeries) -> PySeries: ...
def map_get(self, key: PySeries) -> PySeries: ...
def image_decode(self, raise_error_on_failure: bool, mode: ImageMode | None = None) -> PySeries: ...
def image_encode(self, image_format: ImageFormat) -> PySeries: ...
def image_resize(self, w: int, h: int) -> PySeries: ...
def image_to_mode(self, mode: ImageMode) -> PySeries: ...
def if_else(self, other: PySeries, predicate: PySeries) -> PySeries: ...
def is_null(self) -> PySeries: ...
def not_null(self) -> PySeries: ...
Expand Down
6 changes: 6 additions & 0 deletions daft/daft/image.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from daft.daft import ImageFormat, ImageMode, PySeries

def decode(s: PySeries, raise_error_on_failure: bool, mode: ImageMode | None = None) -> PySeries: ...
def encode(s: PySeries, image_format: ImageFormat) -> PySeries: ...
def resize(s: PySeries, w: int, h: int) -> PySeries: ...
def to_mode(s: PySeries, mode: ImageMode) -> PySeries: ...
10 changes: 5 additions & 5 deletions daft/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pyarrow as pa

from daft.arrow_utils import ensure_array, ensure_chunked_array
from daft.daft import CountMode, ImageFormat, ImageMode, PySeries
from daft.daft import CountMode, ImageFormat, ImageMode, PySeries, image
from daft.datatype import DataType
from daft.utils import pyarrow_supports_fixed_shape_tensor

Expand Down Expand Up @@ -994,26 +994,26 @@ def decode(
mode = ImageMode.from_mode_string(mode.upper())
if not isinstance(mode, ImageMode):
raise ValueError(f"mode must be a string or ImageMode variant, but got: {mode}")
return Series._from_pyseries(self._series.image_decode(raise_error_on_failure=raise_on_error, mode=mode))
return Series._from_pyseries(image.decode(self._series, raise_error_on_failure=raise_on_error, mode=mode))

def encode(self, image_format: str | ImageFormat) -> Series:
if isinstance(image_format, str):
image_format = ImageFormat.from_format_string(image_format.upper())
if not isinstance(image_format, ImageFormat):
raise ValueError(f"image_format must be a string or ImageFormat variant, but got: {image_format}")
return Series._from_pyseries(self._series.image_encode(image_format))
return Series._from_pyseries(image.encode(self._series, image_format))

def resize(self, w: int, h: int) -> Series:
if not isinstance(w, int):
raise TypeError(f"expected int for w but got {type(w)}")
if not isinstance(h, int):
raise TypeError(f"expected int for h but got {type(h)}")

return Series._from_pyseries(self._series.image_resize(w, h))
return Series._from_pyseries(image.resize(self._series, w, h))

def to_mode(self, mode: str | ImageMode) -> Series:
if isinstance(mode, str):
mode = ImageMode.from_mode_string(mode.upper())
if not isinstance(mode, ImageMode):
raise ValueError(f"mode must be a string or ImageMode variant, but got: {mode}")
return Series._from_pyseries(self._series.image_to_mode(mode))
return Series._from_pyseries(image.to_mode(self._series, mode))
12 changes: 8 additions & 4 deletions src/common/display/src/table_display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@ pub use comfy_table;

const BOLD_TABLE_HEADERS_IN_DISPLAY: &str = "DAFT_BOLD_TABLE_HEADERS";

pub trait StrValue {
fn str_value(&self, idx: usize) -> String;
}

pub trait HTMLValue {
fn html_value(&self, idx: usize) -> String;
}

// this should be factored out to a common crate
fn create_table_cell(value: &str) -> comfy_table::Cell {
let mut attributes = vec![];
Expand Down Expand Up @@ -43,10 +51,6 @@ pub fn make_schema_vertical_table(
table
}

pub trait StrValue {
fn str_value(&self, idx: usize) -> String;
}

pub fn make_comfy_table<S: AsRef<str>>(
fields: &[S],
columns: Option<&[&dyn StrValue]>,
Expand Down
6 changes: 0 additions & 6 deletions src/daft-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ arrow2 = {workspace = true, features = [
"compute_substring",
"io_ipc"
]}
base64 = "0.22.0"
bincode = {workspace = true}
chrono = {workspace = true}
chrono-tz = {workspace = true}
Expand Down Expand Up @@ -51,11 +50,6 @@ serde_json = {workspace = true}
sketches-ddsketch = {workspace = true}
unicode-normalization = "0.1.23"

[dependencies.image]
default-features = false
features = ["gif", "jpeg", "ico", "png", "tiff", "webp", "bmp", "hdr"]
version = "0.24.7"

[dependencies.numpy]
optional = true
version = "0.19"
Expand Down
166 changes: 166 additions & 0 deletions src/daft-core/src/array/image_array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
use std::vec;

use common_error::DaftResult;

use crate::array::prelude::*;
use crate::datatypes::prelude::*;

use crate::series::{IntoSeries, Series};

#[derive(Clone)]
pub struct BBox(pub u32, pub u32, pub u32, pub u32);

impl BBox {
pub fn from_u32_arrow_array(arr: &dyn arrow2::array::Array) -> Self {
assert!(arr.len() == 4);
let mut iter = arr
.as_any()
.downcast_ref::<arrow2::array::UInt32Array>()
.unwrap()
.iter();
BBox(
*iter.next().unwrap().unwrap(),
*iter.next().unwrap().unwrap(),
*iter.next().unwrap().unwrap(),
*iter.next().unwrap().unwrap(),
)
}
}

pub struct ImageArraySidecarData {
pub channels: Vec<u16>,
pub heights: Vec<u32>,
pub widths: Vec<u32>,
pub modes: Vec<u8>,
pub validity: Option<arrow2::bitmap::Bitmap>,
}

impl ImageArray {
pub const IMAGE_DATA_IDX: usize = 0;
pub const IMAGE_CHANNEL_IDX: usize = 1;
pub const IMAGE_HEIGHT_IDX: usize = 2;
pub const IMAGE_WIDTH_IDX: usize = 3;
pub const IMAGE_MODE_IDX: usize = 4;

pub fn image_mode(&self) -> &Option<ImageMode> {
match self.data_type() {
DataType::Image(mode) => mode,
_ => panic!("Expected dtype to be Image"),
}
}

pub fn data_array(&self) -> &ListArray {
let array = self.physical.children.get(Self::IMAGE_DATA_IDX).unwrap();
array.list().unwrap()
}

pub fn channel_array(&self) -> &arrow2::array::UInt16Array {
let array = self.physical.children.get(Self::IMAGE_CHANNEL_IDX).unwrap();
array.u16().unwrap().as_arrow()
}

pub fn height_array(&self) -> &arrow2::array::UInt32Array {
let array = self.physical.children.get(Self::IMAGE_HEIGHT_IDX).unwrap();
array.u32().unwrap().as_arrow()
}

pub fn width_array(&self) -> &arrow2::array::UInt32Array {
let array = self.physical.children.get(Self::IMAGE_WIDTH_IDX).unwrap();
array.u32().unwrap().as_arrow()
}

pub fn mode_array(&self) -> &arrow2::array::UInt8Array {
let array = self.physical.children.get(Self::IMAGE_MODE_IDX).unwrap();
array.u8().unwrap().as_arrow()
}

pub fn from_list_array(
name: &str,
data_type: DataType,
data_array: ListArray,
sidecar_data: ImageArraySidecarData,
) -> DaftResult<Self> {
let values: Vec<Series> = vec![
data_array.into_series().rename("data"),
UInt16Array::from((
"channel",
Box::new(
arrow2::array::UInt16Array::from_vec(sidecar_data.channels)
.with_validity(sidecar_data.validity.clone()),
),
))
.into_series(),
UInt32Array::from((
"height",
Box::new(
arrow2::array::UInt32Array::from_vec(sidecar_data.heights)
.with_validity(sidecar_data.validity.clone()),
),
))
.into_series(),
UInt32Array::from((
"width",
Box::new(
arrow2::array::UInt32Array::from_vec(sidecar_data.widths)
.with_validity(sidecar_data.validity.clone()),
),
))
.into_series(),
UInt8Array::from((
"mode",
Box::new(
arrow2::array::UInt8Array::from_vec(sidecar_data.modes)
.with_validity(sidecar_data.validity.clone()),
),
))
.into_series(),
];
let physical_type = data_type.to_physical();
let struct_array = StructArray::new(
Field::new(name, physical_type),
values,
sidecar_data.validity,
);
Ok(ImageArray::new(Field::new(name, data_type), struct_array))
}

pub fn from_vecs<T: arrow2::types::NativeType>(
name: &str,
data_type: DataType,
data: Vec<T>,
offsets: Vec<i64>,
sidecar_data: ImageArraySidecarData,
) -> DaftResult<Self> {
if data.is_empty() {
return Ok(ImageArray::full_null(name, &data_type, offsets.len() - 1));
}
let offsets = arrow2::offset::OffsetsBuffer::try_from(offsets)?;
let arrow_dtype: arrow2::datatypes::DataType = T::PRIMITIVE.into();
if let DataType::Image(Some(mode)) = &data_type {
if mode.get_dtype().to_arrow()? != arrow_dtype {
panic!("Inner value dtype of provided dtype {data_type:?} is inconsistent with inferred value dtype {arrow_dtype:?}");
}
}
let data_array = ListArray::new(
Field::new("data", DataType::List(Box::new((&arrow_dtype).into()))),
Series::try_from((
"data",
Box::new(arrow2::array::PrimitiveArray::from_vec(data))
as Box<dyn arrow2::array::Array>,
))?,
offsets,
sidecar_data.validity.clone(),
);

Self::from_list_array(name, data_type, data_array, sidecar_data)
}
}

impl FixedShapeImageArray {
pub fn image_mode(&self) -> &ImageMode {
match self.data_type() {
DataType::FixedShapeImage(mode, _, _) => mode,
other => panic!("Expected dtype to be Image, got {other:?}"),
}
}
}
1 change: 1 addition & 0 deletions src/daft-core/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod fixed_size_list_array;
pub mod from;
pub mod growable;
pub mod image_array;
pub mod iterator;
mod list_array;
pub mod ops;
Expand Down
3 changes: 2 additions & 1 deletion src/daft-core/src/array/ops/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use super::as_arrow::AsArrow;
use crate::{
array::{
growable::make_growable,
ops::{from_arrow::FromArrow, full::FullNull, image::ImageArraySidecarData},
image_array::ImageArraySidecarData,
ops::{from_arrow::FromArrow, full::FullNull},
DataArray, FixedSizeListArray, ListArray, StructArray,
},
datatypes::{
Expand Down
Loading

0 comments on commit 3e2d25b

Please sign in to comment.