Skip to content

Commit

Permalink
Support more integer dtypes in Series.from_binary/2 (#790)
Browse files Browse the repository at this point in the history
* Add more signed integer dtypes to "from_binary/2"

* Add support for more unsigned int types in from_binary/2

* Add support for unsigned integers in Series.to_iovec/1

* Fix formatting

* Change aliases for sig ints to `:s` instead of `:i`

* Rename functions to follow our signed int aliases
  • Loading branch information
philss authored Dec 29, 2023
1 parent 5c51c26 commit 64585e4
Show file tree
Hide file tree
Showing 8 changed files with 228 additions and 52 deletions.
17 changes: 11 additions & 6 deletions lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,10 @@ defmodule Explorer.PolarsBackend.Native do
def s_from_list_duration(_name, _val, _precision), do: err()
def s_from_list_f32(_name, _val), do: err()
def s_from_list_f64(_name, _val), do: err()
def s_from_list_i8(_name, _val), do: err()
def s_from_list_i16(_name, _val), do: err()
def s_from_list_i32(_name, _val), do: err()
def s_from_list_i64(_name, _val), do: err()
def s_from_list_s8(_name, _val), do: err()
def s_from_list_s16(_name, _val), do: err()
def s_from_list_s32(_name, _val), do: err()
def s_from_list_s64(_name, _val), do: err()
def s_from_list_u8(_name, _val), do: err()
def s_from_list_u16(_name, _val), do: err()
def s_from_list_u32(_name, _val), do: err()
Expand All @@ -359,9 +359,14 @@ defmodule Explorer.PolarsBackend.Native do
def s_from_list_of_series_as_structs(_name, _val), do: err()
def s_from_binary_f32(_name, _val), do: err()
def s_from_binary_f64(_name, _val), do: err()
def s_from_binary_i32(_name, _val), do: err()
def s_from_binary_i64(_name, _val), do: err()
def s_from_binary_s8(_name, _val), do: err()
def s_from_binary_s16(_name, _val), do: err()
def s_from_binary_s32(_name, _val), do: err()
def s_from_binary_s64(_name, _val), do: err()
def s_from_binary_u8(_name, _val), do: err()
def s_from_binary_u16(_name, _val), do: err()
def s_from_binary_u32(_name, _val), do: err()
def s_from_binary_u64(_name, _val), do: err()
def s_not_equal(_s, _rhs), do: err()
def s_or(_s, _s2), do: err()
def s_peak_max(_s), do: err()
Expand Down
47 changes: 34 additions & 13 deletions lib/explorer/polars_backend/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,10 @@ defmodule Explorer.PolarsBackend.Shared do
def from_list(list, dtype, name) when is_list(list) do
case dtype do
# Signed integers
{:s, 8} -> Native.s_from_list_i8(name, list)
{:s, 16} -> Native.s_from_list_i16(name, list)
{:s, 32} -> Native.s_from_list_i32(name, list)
{:s, 64} -> Native.s_from_list_i64(name, list)
{:s, 8} -> Native.s_from_list_s8(name, list)
{:s, 16} -> Native.s_from_list_s16(name, list)
{:s, 32} -> Native.s_from_list_s32(name, list)
{:s, 64} -> Native.s_from_list_s64(name, list)
# Unsigned integers
{:u, 8} -> Native.s_from_list_u8(name, list)
{:u, 16} -> Native.s_from_list_u16(name, list)
Expand All @@ -163,31 +163,52 @@ defmodule Explorer.PolarsBackend.Shared do
Native.s_from_binary_u8(name, binary) |> Native.s_cast(dtype) |> ok()

:date ->
Native.s_from_binary_i32(name, binary) |> Native.s_cast(dtype) |> ok()
Native.s_from_binary_s32(name, binary) |> Native.s_cast(dtype) |> ok()

:time ->
Native.s_from_binary_i64(name, binary) |> Native.s_cast(dtype) |> ok()
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:datetime, :millisecond} ->
Native.s_from_binary_i64(name, binary) |> Native.s_cast(dtype) |> ok()
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:datetime, :microsecond} ->
Native.s_from_binary_i64(name, binary) |> Native.s_cast(dtype) |> ok()
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:datetime, :nanosecond} ->
Native.s_from_binary_i64(name, binary) |> Native.s_cast(dtype) |> ok()
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:duration, :millisecond} ->
Native.s_from_binary_i64(name, binary) |> Native.s_cast(dtype) |> ok()
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:duration, :microsecond} ->
Native.s_from_binary_i64(name, binary) |> Native.s_cast(dtype) |> ok()
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:duration, :nanosecond} ->
Native.s_from_binary_i64(name, binary) |> Native.s_cast(dtype) |> ok()
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:s, 8} ->
Native.s_from_binary_s8(name, binary)

{:s, 16} ->
Native.s_from_binary_s16(name, binary)

{:s, 32} ->
Native.s_from_binary_s32(name, binary)

{:s, 64} ->
Native.s_from_binary_i64(name, binary)
Native.s_from_binary_s64(name, binary)

{:u, 8} ->
Native.s_from_binary_u8(name, binary)

{:u, 16} ->
Native.s_from_binary_u16(name, binary)

{:u, 32} ->
Native.s_from_binary_u32(name, binary)

{:u, 64} ->
Native.s_from_binary_u64(name, binary)

{:f, 32} ->
Native.s_from_binary_f32(name, binary)
Expand Down
23 changes: 12 additions & 11 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ defmodule Explorer.Series do
* The atom `:integer` as an alias for `{:s, 64}` to mirror Elixir's integers
* There are serveral atoms to represent integer dtypes, and they also follow Nx naming for compatibility.
They are the following:
* `i8` as alias to `{:s, 8}`
* `i16` as alias to `{:s, 16}`
* `i32` as alias to `{:s, 32}`
* `i64` as alias to `{:s, 64}`
* `s8` as alias to `{:s, 8}`
* `s16` as alias to `{:s, 16}`
* `s32` as alias to `{:s, 32}`
* `s64` as alias to `{:s, 64}`
* `u8` as alias to `{:u, 8}`
* `u16` as alias to `{:u, 16}`
* `u32` as alias to `{:u, 32}`
Expand Down Expand Up @@ -427,7 +427,7 @@ defmodule Explorer.Series do
boolean [true, false, true]
>
Dates are encoded as i32 representing days from the Unix epoch (1970-01-01):
Dates are encoded as s32 representing days from the Unix epoch (1970-01-01):
iex> binary = <<-719162::signed-32-native, 0::signed-32-native, 6129::signed-32-native>>
iex> Explorer.Series.from_binary(binary, :date)
Expand All @@ -436,7 +436,7 @@ defmodule Explorer.Series do
date [0001-01-01, 1970-01-01, 1986-10-13]
>
Times are encoded as i64 representing nanoseconds from midnight:
Times are encoded as s64 representing nanoseconds from midnight:
iex> binary = <<0::signed-64-native, 86399999999000::signed-64-native>>
iex> Explorer.Series.from_binary(binary, :time)
Expand All @@ -445,7 +445,7 @@ defmodule Explorer.Series do
time [00:00:00.000000, 23:59:59.999999]
>
Datetimes are encoded as i64 representing microseconds from the Unix epoch (1970-01-01):
Datetimes are encoded as s64 representing microseconds from the Unix epoch (1970-01-01):
iex> binary = <<0::signed-64-native, 529550625987654::signed-64-native>>
iex> Explorer.Series.from_binary(binary, {:datetime, :microsecond})
Expand Down Expand Up @@ -478,7 +478,8 @@ defmodule Explorer.Series do
{_type, alignment} = dtype |> Shared.dtype_to_iotype!()

if rem(bit_size(binary), alignment) != 0 do
raise ArgumentError, "binary for dtype #{dtype} is expected to be #{alignment}-bit aligned"
raise ArgumentError,
"binary for dtype #{Shared.dtype_to_string(dtype)} is expected to be #{alignment}-bit aligned"
end

backend = backend_from_options!(opts)
Expand Down Expand Up @@ -709,19 +710,19 @@ defmodule Explorer.Series do
iex> Explorer.Series.to_iovec(series)
[<<1, 0, 1>>]
Dates are encoded as i32 representing days from the Unix epoch (1970-01-01):
Dates are encoded as s32 representing days from the Unix epoch (1970-01-01):
iex> series = Explorer.Series.from_list([~D[0001-01-01], ~D[1970-01-01], ~D[1986-10-13]])
iex> Explorer.Series.to_iovec(series)
[<<-719162::signed-32-native, 0::signed-32-native, 6129::signed-32-native>>]
Times are encoded as i64 representing nanoseconds from midnight:
Times are encoded as s64 representing nanoseconds from midnight:
iex> series = Explorer.Series.from_list([~T[00:00:00.000000], ~T[23:59:59.999999]])
iex> Explorer.Series.to_iovec(series)
[<<0::signed-64-native, 86399999999000::signed-64-native>>]
Datetimes are encoded as i64 representing their precision from the Unix epoch (1970-01-01):
Datetimes are encoded as s64 representing their precision from the Unix epoch (1970-01-01):
iex> series = Explorer.Series.from_list([~N[0001-01-01 00:00:00], ~N[1970-01-01 00:00:00], ~N[1986-10-13 01:23:45.987654]])
iex> Explorer.Series.to_iovec(series)
Expand Down
8 changes: 4 additions & 4 deletions lib/explorer/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ defmodule Explorer.Shared do

def normalise_dtype(dtype) when dtype in @scalar_types, do: dtype
def normalise_dtype(dtype) when dtype in [:float, :f64], do: {:f, 64}
def normalise_dtype(dtype) when dtype in [:integer, :i64], do: {:s, 64}
def normalise_dtype(dtype) when dtype in [:integer, :s64], do: {:s, 64}
def normalise_dtype(:f32), do: {:f, 32}
def normalise_dtype(:i8), do: {:s, 8}
def normalise_dtype(:i16), do: {:s, 16}
def normalise_dtype(:i32), do: {:s, 32}
def normalise_dtype(:s8), do: {:s, 8}
def normalise_dtype(:s16), do: {:s, 16}
def normalise_dtype(:s32), do: {:s, 32}
def normalise_dtype(:u8), do: {:u, 8}
def normalise_dtype(:u16), do: {:u, 16}
def normalise_dtype(:u32), do: {:u, 32}
Expand Down
4 changes: 4 additions & 0 deletions native/explorer/src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,10 @@ pub fn iovec_from_series(s: ExSeries, env: Env) -> Result<Term, ExplorerError> {
DataType::Int16 => series_to_iovec!(resource, s, env, i16, i16),
DataType::Int32 => series_to_iovec!(resource, s, env, i32, i32),
DataType::Int64 => series_to_iovec!(resource, s, env, i64, i64),
DataType::UInt8 => series_to_iovec!(resource, s, env, u8, u8),
DataType::UInt16 => series_to_iovec!(resource, s, env, u16, u16),
DataType::UInt32 => series_to_iovec!(resource, s, env, u32, u32),
DataType::UInt64 => series_to_iovec!(resource, s, env, u64, u64),
DataType::Float32 => series_to_iovec!(resource, s, env, f32, f32),
DataType::Float64 => series_to_iovec!(resource, s, env, f64, f64),
DataType::Date => series_to_iovec!(resource, s, env, date, i32),
Expand Down
17 changes: 11 additions & 6 deletions native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,10 @@ rustler::init!(
s_from_list_duration,
s_from_list_f32,
s_from_list_f64,
s_from_list_i8,
s_from_list_i16,
s_from_list_i32,
s_from_list_i64,
s_from_list_s8,
s_from_list_s16,
s_from_list_s32,
s_from_list_s64,
s_from_list_u8,
s_from_list_u16,
s_from_list_u32,
Expand All @@ -412,9 +412,14 @@ rustler::init!(
s_from_list_of_series_as_structs,
s_from_binary_f32,
s_from_binary_f64,
s_from_binary_i64,
s_from_binary_i32,
s_from_binary_s8,
s_from_binary_s16,
s_from_binary_s32,
s_from_binary_s64,
s_from_binary_u8,
s_from_binary_u16,
s_from_binary_u32,
s_from_binary_u64,
s_not_equal,
s_or,
s_peak_max,
Expand Down
19 changes: 13 additions & 6 deletions native/explorer/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ macro_rules! from_list {
};
}

from_list!(s_from_list_i8, i8);
from_list!(s_from_list_i16, i16);
from_list!(s_from_list_i32, i32);
from_list!(s_from_list_i64, i64);
from_list!(s_from_list_s8, i8);
from_list!(s_from_list_s16, i16);
from_list!(s_from_list_s32, i32);
from_list!(s_from_list_s64, i64);

from_list!(s_from_list_u8, u8);
from_list!(s_from_list_u16, u16);
Expand Down Expand Up @@ -212,9 +212,16 @@ macro_rules! from_binary {

from_binary!(s_from_binary_f32, f32, 4);
from_binary!(s_from_binary_f64, f64, 8);
from_binary!(s_from_binary_i32, i32, 4);
from_binary!(s_from_binary_i64, i64, 8);

from_binary!(s_from_binary_s8, i8, 1);
from_binary!(s_from_binary_s16, i16, 2);
from_binary!(s_from_binary_s32, i32, 4);
from_binary!(s_from_binary_s64, i64, 8);

from_binary!(s_from_binary_u8, u8, 1);
from_binary!(s_from_binary_u16, u16, 2);
from_binary!(s_from_binary_u32, u32, 4);
from_binary!(s_from_binary_u64, u64, 8);

#[rustler::nif]
pub fn s_name(data: ExSeries) -> Result<String, ExplorerError> {
Expand Down
Loading

0 comments on commit 64585e4

Please sign in to comment.