Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Unit transform #283

Merged
merged 5 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/src/transforms.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ AbsoluteUnits
Unitify
```

## Unit

```@docs
Unit
```

## Map

```@docs
Expand Down
1 change: 1 addition & 0 deletions src/TableTransforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ export
DropUnits,
AbsoluteUnits,
Unitify,
Unit,
Map,
Replace,
Coalesce,
Expand Down
1 change: 1 addition & 0 deletions src/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ include("transforms/dropunits.jl")
include("transforms/dropconstant.jl")
include("transforms/absoluteunits.jl")
include("transforms/unitify.jl")
include("transforms/unit.jl")
include("transforms/map.jl")
include("transforms/replace.jl")
include("transforms/coalesce.jl")
Expand Down
88 changes: 88 additions & 0 deletions src/transforms/unit.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# ------------------------------------------------------------------
# Licensed under the MIT License. See LICENSE in the project root.
# ------------------------------------------------------------------

"""
Unit(unit)

Converts the units of all columns in the table to `unit`.

Unit(cols₁ => unit₁, cols₂ => unit₂, ..., colsₙ => unitₙ)

Converts the units of selected columns `cols₁`, `cols₂`, ..., `colsₙ`
to `unit₁`, `unit₂`, ... `unitₙ`.

The column selection can be a single column identifier (index or name),
a collection of identifiers or a regular expression (regex).

# Examples

```julia
Unit(u"m")
Unit(1 => u"km", :b => u"K", "c" => u"s")
Unit([2, 3] => u"cm")
Unit([:a, :c] => u"cm")
Unit(["a", "c"] => u"cm")
Unit(r"[abc]" => u"km")
```
"""
struct Unit <: StatelessFeatureTransform
selectors::Vector{ColumnSelector}
units::Vector{Units}
end

Unit() = throw(ArgumentError("cannot create Unit transform without arguments"))

Unit(unit::Units) = Unit([AllSelector()], [unit])

Unit(pairs::Pair...) = Unit(collect(selector.(first.(pairs))), collect(last.(pairs)))

isrevertible(::Type{<:Unit}) = true

_uconvert(u, x) = _uconvert(nonmissingtype(eltype(x)), u, x)
_uconvert(::Type, _, x) = (x, nothing)
_uconvert(::Type{Q}, u, x) where {Q<:AbstractQuantity} = (map(v -> uconvert(u, v), x), unit(Q))

function applyfeat(transform::Unit, feat, prep)
cols = Tables.columns(feat)
names = Tables.columnnames(cols)

selectors = transform.selectors
units = transform.units
pairs = mapreduce(vcat, selectors, units) do selector, u
snames = selector(names)
snames .=> u
end
unitdict = Dict(pairs)

tuples = map(names) do name
x = Tables.getcolumn(cols, name)
if haskey(unitdict, name)
u = unitdict[name]
_uconvert(u, x)
else
(x, nothing)
end
end

columns = first.(tuples)
ounits = last.(tuples)

𝒯 = (; zip(names, columns)...)
newfeat = 𝒯 |> Tables.materializer(feat)
newfeat, ounits
end

function revertfeat(::Unit, newfeat, fcache)
cols = Tables.columns(newfeat)
names = Tables.columnnames(cols)

ounits = fcache
columns = map(names, ounits) do name, u
x = Tables.getcolumn(cols, name)
isnothing(u) ? x : map(v -> uconvert(u, v), x)
end

𝒯 = (; zip(names, columns)...)
𝒯 |> Tables.materializer(newfeat)
end
15 changes: 15 additions & 0 deletions test/shows.jl
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,21 @@
@test iostr == "Unitify transform"
end

@testset "Unit" begin
T = Unit(:a => u"m", [:b, :c] => u"s")

# compact mode
iostr = sprint(show, T)
@test iostr == "Unit(selectors: ColumnSelector[:a, [:b, :c]], units: Units[m, s])"

# full mode
iostr = sprint(show, MIME("text/plain"), T)
@test iostr == """
Unit transform
├─ selectors: ColumnSelectors.ColumnSelector[:a, [:b, :c]]
└─ units: Unitful.Units[m, s]"""
end

@testset "Map" begin
fun = (a, b) -> 2a + b
T = Map(:a => sin, [:a, :b] => fun => :c)
Expand Down
1 change: 1 addition & 0 deletions test/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ transformfiles = [
"dropconstant.jl",
"absoluteunits.jl",
"unitify.jl",
"unit.jl",
"map.jl",
"replace.jl",
"coalesce.jl",
Expand Down
165 changes: 165 additions & 0 deletions test/transforms/unit.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
@testset "Unit" begin
@test isrevertible(Unit(u"m"))

a = [2.7, 2.9, 2.2, 1.4, 1.8, 3.3] * u"m"
b = [300, 500, missing, 800, missing, 400] * u"cm"
c = [8, 2, 5, 7, 9, 4] * u"km"
d = [0.3, 0.1, 0.9, 0.2, 0.7, 0.4]
e = ["no", "no", "yes", "yes", "no", "yes"]
t = Table(; a, b, c, d, e)

T = Unit(u"m")
n, c = apply(T, t)
@test unit(eltype(n.a)) == u"m"
@test unit(eltype(n.b)) == u"m"
@test unit(eltype(n.c)) == u"m"
@test eltype(n.d) <: Float64
@test eltype(n.e) <: String
tₒ = revert(T, n, c)
@test unit(eltype(tₒ.a)) == u"m"
@test unit(eltype(tₒ.b)) == u"cm"
@test unit(eltype(tₒ.c)) == u"km"
@test all(isapprox.(tₒ.a, t.a))
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
@test all(isapprox.(tₒ.c, t.c))
@test tₒ.d == t.d
@test tₒ.e == t.e

a = [2.7, 2.9, 2.2, 1.4, 1.8, 3.3] * u"m"
b = [300, 500, missing, 800, missing, 400] * u"cm"
c = [8, 2, 5, 7, 9, 4] * u"km"
d = [29.1, missing, 29.2, missing, 28.4, 26.4] * u"°C"
e = [0.9, 0.4, 0.5, 0.1, 0.3, 0.6] * u"kg"
f = 0.5u"ppm" * e
t = Table(; a, b, c, d, e, f)

T = Unit(4 => u"K")
n, c = apply(T, t)
@test unit(eltype(n.a)) == u"m"
@test unit(eltype(n.b)) == u"cm"
@test unit(eltype(n.c)) == u"km"
@test unit(eltype(n.d)) == u"K"
@test unit(eltype(n.e)) == u"kg"
@test unit(eltype(n.f)) == u"kg * ppm"
tₒ = revert(T, n, c)
@test unit(eltype(tₒ.d)) == u"°C"
@test tₒ.a == t.a
@test isequal(tₒ.b, t.b)
@test tₒ.c == t.c
@test all(isapprox.(skipmissing(tₒ.d), skipmissing(t.d)))
@test tₒ.e == t.e
@test tₒ.f == t.f

T = Unit(:e => u"g")
n, c = apply(T, t)
@test unit(eltype(n.a)) == u"m"
@test unit(eltype(n.b)) == u"cm"
@test unit(eltype(n.c)) == u"km"
@test unit(eltype(n.d)) == u"°C"
@test unit(eltype(n.e)) == u"g"
@test unit(eltype(n.f)) == u"kg * ppm"
tₒ = revert(T, n, c)
@test unit(eltype(tₒ.e)) == u"kg"
@test tₒ.a == t.a
@test isequal(tₒ.b, t.b)
@test tₒ.c == t.c
@test isequal(tₒ.d, t.d)
@test all(isapprox.(tₒ.e, t.e))
@test tₒ.f == t.f

T = Unit("f" => u"kg")
n, c = apply(T, t)
@test unit(eltype(n.a)) == u"m"
@test unit(eltype(n.b)) == u"cm"
@test unit(eltype(n.c)) == u"km"
@test unit(eltype(n.d)) == u"°C"
@test unit(eltype(n.e)) == u"kg"
@test unit(eltype(n.f)) == u"kg"
tₒ = revert(T, n, c)
@test unit(eltype(tₒ.f)) == u"kg * ppm"
@test tₒ.a == t.a
@test isequal(tₒ.b, t.b)
@test tₒ.c == t.c
@test isequal(tₒ.d, t.d)
@test tₒ.e == t.e
@test all(isapprox.(tₒ.f, t.f))

T = Unit([1, 2, 3] => u"m")
n, c = apply(T, t)
@test unit(eltype(n.a)) == u"m"
@test unit(eltype(n.b)) == u"m"
@test unit(eltype(n.c)) == u"m"
@test unit(eltype(n.d)) == u"°C"
@test unit(eltype(n.e)) == u"kg"
@test unit(eltype(n.f)) == u"kg * ppm"
tₒ = revert(T, n, c)
@test unit(eltype(tₒ.a)) == u"m"
@test unit(eltype(tₒ.b)) == u"cm"
@test unit(eltype(tₒ.c)) == u"km"
@test all(isapprox.(tₒ.a, t.a))
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
@test all(isapprox.(tₒ.c, t.c))
@test isequal(tₒ.d, t.d)
@test tₒ.e == t.e
@test tₒ.f == t.f

T = Unit([:a, :b, :c] => u"cm")
n, c = apply(T, t)
@test unit(eltype(n.a)) == u"cm"
@test unit(eltype(n.b)) == u"cm"
@test unit(eltype(n.c)) == u"cm"
@test unit(eltype(n.d)) == u"°C"
@test unit(eltype(n.e)) == u"kg"
@test unit(eltype(n.f)) == u"kg * ppm"
tₒ = revert(T, n, c)
@test unit(eltype(tₒ.a)) == u"m"
@test unit(eltype(tₒ.b)) == u"cm"
@test unit(eltype(tₒ.c)) == u"km"
@test all(isapprox.(tₒ.a, t.a))
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
@test all(isapprox.(tₒ.c, t.c))
@test isequal(tₒ.d, t.d)
@test tₒ.e == t.e
@test tₒ.f == t.f

T = Unit(["a", "b", "c"] => u"km")
n, c = apply(T, t)
@test unit(eltype(n.a)) == u"km"
@test unit(eltype(n.b)) == u"km"
@test unit(eltype(n.c)) == u"km"
@test unit(eltype(n.d)) == u"°C"
@test unit(eltype(n.e)) == u"kg"
@test unit(eltype(n.f)) == u"kg * ppm"
tₒ = revert(T, n, c)
@test unit(eltype(tₒ.a)) == u"m"
@test unit(eltype(tₒ.b)) == u"cm"
@test unit(eltype(tₒ.c)) == u"km"
@test all(isapprox.(tₒ.a, t.a))
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
@test all(isapprox.(tₒ.c, t.c))
@test isequal(tₒ.d, t.d)
@test tₒ.e == t.e
@test tₒ.f == t.f

T = Unit(r"[abc]" => u"m")
n, c = apply(T, t)
@test unit(eltype(n.a)) == u"m"
@test unit(eltype(n.b)) == u"m"
@test unit(eltype(n.c)) == u"m"
@test unit(eltype(n.d)) == u"°C"
@test unit(eltype(n.e)) == u"kg"
@test unit(eltype(n.f)) == u"kg * ppm"
tₒ = revert(T, n, c)
@test unit(eltype(tₒ.a)) == u"m"
@test unit(eltype(tₒ.b)) == u"cm"
@test unit(eltype(tₒ.c)) == u"km"
@test all(isapprox.(tₒ.a, t.a))
@test all(isapprox.(skipmissing(tₒ.b), skipmissing(t.b)))
@test all(isapprox.(tₒ.c, t.c))
@test isequal(tₒ.d, t.d)
@test tₒ.e == t.e
@test tₒ.f == t.f

# error: cannot create Unit transform without arguments
@test_throws ArgumentError Unit()
end
Loading