Skip to content

Commit

Permalink
Merge pull request #259 from markfairbanks/tibble-class
Browse files Browse the repository at this point in the history
Convert `Tibble`class to `tibble`
  • Loading branch information
markfairbanks authored Sep 23, 2024
2 parents af4d458 + f35d341 commit 3ea0028
Show file tree
Hide file tree
Showing 12 changed files with 316 additions and 308 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@

## v0.3.0 (in development)

* Major refactor to work with `polars>=0.20.8`
* Major refactor to work with `polars>=1.0.0`

#### Functionality improvements

* Convert `by` arg `_by` to allow naming columns `by` in `.mutate()`/`.summarize()`

#### New functions

* `where()`

## v0.2.19

Expand Down
64 changes: 32 additions & 32 deletions tests/test_funs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

def test_abs():
"""Can get absolute value"""
df = tp.Tibble(x = range(-3, 0))
df = tp.tibble(x = range(-3, 0))
actual = df.mutate(abs_x = tp.abs('x'), abs_col_x = tp.abs(col('x')))
expected = tp.Tibble(x = range(-3, 0), abs_x = range(3, 0, -1), abs_col_x = range(3, 0, -1))
expected = tp.tibble(x = range(-3, 0), abs_x = range(3, 0, -1), abs_col_x = range(3, 0, -1))
assert actual.equals(expected), "abs failed"

def test_agg_stats():
"""Can get aggregation statistics"""
df = tp.Tibble(x = range(3), y = [2, 1, 0])
df = tp.tibble(x = range(3), y = [2, 1, 0])
actual = (
df
.summarize(
Expand All @@ -34,7 +34,7 @@ def test_agg_stats():
)
.mutate(tp.as_integer(cs.numeric().as_expr()))
)
expected = tp.Tibble(
expected = tp.tibble(
corr = [-1],
count_x = [3], count_col_x = [3],
cov = [-1],
Expand All @@ -55,16 +55,16 @@ def test_agg_stats():

def test_case_when():
"""Can use case_when"""
df = tp.Tibble(x = range(1, 4))
df = tp.tibble(x = range(1, 4))
actual = df.mutate(case_x = tp.case_when(col('x') < 2).then(0)
.when(col('x') < 3).then(1)
.otherwise(0))
expected = tp.Tibble(x = range(1, 4), case_x = [0, 1, 0])
expected = tp.tibble(x = range(1, 4), case_x = [0, 1, 0])
assert actual.equals(expected), "case_when failed"

def test_casting():
"""Can do type casting"""
df = tp.Tibble(int_col = [0, 0, 1], float_col = [1.0, 2.0, 3.0], chr_col = ["1", "2", "3"])
df = tp.tibble(int_col = [0, 0, 1], float_col = [1.0, 2.0, 3.0], chr_col = ["1", "2", "3"])
actual = (
df
.mutate(float_cast = tp.as_float('int_col'),
Expand All @@ -73,70 +73,70 @@ def test_casting():
bool_cast = tp.as_boolean('int_col'))
.select('float_cast', 'int_cast', 'string_cast', 'bool_cast')
)
expected = tp.Tibble(float_cast = [0.0, 0.0, 1.0],
expected = tp.tibble(float_cast = [0.0, 0.0, 1.0],
int_cast = [1, 2, 3],
string_cast = ["0", "0", "1"],
bool_cast = [False, False, True])
assert actual.equals(expected), "casting failed"

def test_coalesce():
"""Can use coalesce"""
df = tp.Tibble(x = [None, None, 1], y = [2, None, 2], z = [3, 3, 3])
df = tp.tibble(x = [None, None, 1], y = [2, None, 2], z = [3, 3, 3])
actual = (
df
.mutate(
coalesce_x = tp.coalesce(col('x'), col('y'), col('z'))
)
.select('coalesce_x')
)
expected = tp.Tibble(coalesce_x = [2, 3, 1])
expected = tp.tibble(coalesce_x = [2, 3, 1])
assert actual.equals(expected), "coalesce failed"

def test_floor():
"""Can get the floor"""
df = tp.Tibble(x = [1.1, 5.5])
df = tp.tibble(x = [1.1, 5.5])
actual = df.mutate(floor_x = tp.floor('x')).select('floor_x')
expected = tp.Tibble(floor_x = [1.0, 5.0])
expected = tp.tibble(floor_x = [1.0, 5.0])
assert actual.equals(expected), "floor failed"

def test_lag():
"""Can get lagging values with function"""
df = tp.Tibble({'x': range(3)})
df = tp.tibble({'x': range(3)})
actual = df.mutate(lag_null = tp.lag(col('x')),
lag_default = tp.lag('x', default = 1))
expected = tp.Tibble({'x': range(3),
expected = tp.tibble({'x': range(3),
'lag_null': [None, 0, 1],
'lag_default': [1, 0, 1]})
assert actual.equals(expected, null_equal = True), "lag failed"

def test_lead():
"""Can get leading values with function"""
df = tp.Tibble({'x': range(3)})
df = tp.tibble({'x': range(3)})
actual = df.mutate(lead_null = tp.lead(col('x')),
lead_default = tp.lead('x', default = 1))
expected = tp.Tibble({'x': range(3),
expected = tp.tibble({'x': range(3),
'lead_null': [1, 2, None],
'lead_default': [1, 2, 1]})
assert actual.equals(expected, null_equal = True), "lead failed"

def test_logs():
"""Can get leading values with function"""
df = tp.Tibble({'x': range(1, 4)})
df = tp.tibble({'x': range(1, 4)})
actual = df.mutate(log = tp.log(col('x')).round(2),
log10 = tp.log10('x').round(2))
expected = df.mutate(log = col('x').log().round(2), log10 = col('x').log10().round(2))
assert actual.equals(expected), "log failed"

def test_if_else():
"""Can use if_else"""
df = tp.Tibble(x = range(1, 4))
df = tp.tibble(x = range(1, 4))
actual = df.mutate(case_x = tp.if_else(col('x') < 2, 1, 0))
expected = tp.Tibble(x = range(1, 4), case_x = [1, 0, 0])
expected = tp.tibble(x = range(1, 4), case_x = [1, 0, 0])
assert actual.equals(expected), "if_else failed"

def test_is_predicates():
"""Can use is predicates"""
df = tp.Tibble(x = [0.0, 1.0, 2.0],
df = tp.tibble(x = [0.0, 1.0, 2.0],
y = [None, math.inf, math.nan])
actual = (
df
Expand All @@ -152,7 +152,7 @@ def test_is_predicates():

)
).drop(['x', 'y'])
expected = tp.Tibble(
expected = tp.tibble(
between = [False, True, True],
is_finite = [True, True, True],
is_in = [False, True, True],
Expand All @@ -165,7 +165,7 @@ def test_is_predicates():
assert actual.equals(expected, null_equal = True), "is_predicates failed"

def test_rep():
df = tp.Tibble(x = [0, 1], y = [0, 1])
df = tp.tibble(x = [0, 1], y = [0, 1])
assert tp.rep(df, 2).equals(df.bind_rows(df)), "rep df failed"
assert tp.rep(1, 2).equals(tp.Series([1, 1])), "rep int failed"
assert tp.rep("a", 2).equals(tp.Series(["a", "a"])), "rep str failed"
Expand All @@ -174,38 +174,38 @@ def test_rep():

def test_replace_null():
"""Can replace nulls"""
df = tp.Tibble(x = [0, None], y = [None, None])
df = tp.tibble(x = [0, None], y = [None, None])
actual = df.mutate(x = tp.replace_null(col('x'), 1))
expected = tp.Tibble(x = [0, 1], y = [None, None])
expected = tp.tibble(x = [0, 1], y = [None, None])
assert actual.equals(expected), "replace_null function failed"

def test_row_number():
"""Can get row number"""
df = tp.Tibble(x = ['a', 'a', 'b'])
df = tp.tibble(x = ['a', 'a', 'b'])
actual = df.mutate(row_num = tp.row_number())
expected = tp.Tibble(x = ['a', 'a', 'b'], row_num = [1, 2, 3])
expected = tp.tibble(x = ['a', 'a', 'b'], row_num = [1, 2, 3])
assert actual.equals(expected), "row_number failed"

def test_row_number_group():
"""Can get row number by group"""
df = tp.Tibble(x = ['a', 'a', 'b'])
df = tp.tibble(x = ['a', 'a', 'b'])
actual = (
df.mutate(group_row_num = tp.row_number(), _by = 'x')
.arrange('x', 'group_row_num')
)
expected = tp.Tibble(x = ['a', 'a', 'b'], group_row_num = [1, 2, 1])
expected = tp.tibble(x = ['a', 'a', 'b'], group_row_num = [1, 2, 1])
assert actual.equals(expected), "group row_number failed"

def test_round():
"""Can round values"""
df = tp.Tibble(x = [1.11, 2.22, 3.33])
df = tp.tibble(x = [1.11, 2.22, 3.33])
actual = df.mutate(x = tp.round(col('x'), 1))
expected = tp.Tibble(x = [1.1, 2.2, 3.3])
expected = tp.tibble(x = [1.1, 2.2, 3.3])
assert actual.equals(expected), "round failed"

def test_sqrt():
"""Can get the square root"""
df = tp.Tibble(x = [9, 25, 100])
df = tp.tibble(x = [9, 25, 100])
actual = df.mutate(x = tp.sqrt('x'))
expected = tp.Tibble(x = [3, 5, 10])
expected = tp.tibble(x = [3, 5, 10])
assert actual.equals(expected), "sqrt failed"
28 changes: 14 additions & 14 deletions tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,65 +3,65 @@

def test_group_filter():
"""Can filter by group"""
df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
actual = (
df.filter(col('x') <= col('x').mean(),
_by = 'y')
.arrange('y')
)
expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']})
expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']})
assert actual.equals(expected), "group filter failed"

def test_group_mutate():
"""Can mutate by group"""
df = tp.Tibble({'x': range(2), 'y': ['a', 'b']})
df = tp.tibble({'x': range(2), 'y': ['a', 'b']})
actual = (
df.mutate(avg_x = col('x').mean(),
_by = 'y')
.arrange('y')
)
expected = tp.Tibble({'x': [0, 1], 'y': ['a', 'b'], 'avg_x': [0, 1]})
expected = tp.tibble({'x': [0, 1], 'y': ['a', 'b'], 'avg_x': [0, 1]})
assert actual.equals(expected), "group mutate failed"

def test_group_slice():
"""Can slice by group"""
df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
actual = df.slice(0, _by = 'y').arrange('y')
expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']})
expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']})
assert actual.equals(expected), "group slice failed"

def test_group_slice_head():
"""Can slice_head by group"""
df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
actual = df.slice_head(1, _by = 'y').arrange('y')
expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']})
expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']})
assert actual.equals(expected), "group slice_head failed"

def test_group_slice_tail():
"""Can slice_tail by group"""
df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
actual = df.slice_tail(1, _by = 'y').arrange('y')
expected = tp.Tibble({'x': [1, 2], 'y': ['a', 'b']})
expected = tp.tibble({'x': [1, 2], 'y': ['a', 'b']})
assert actual.equals(expected), "group slice_tail failed"

def test_group_summarize():
"""Can summarize by group"""
df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
actual = df.summarize(avg_x = col('x').mean(), _by = col('y')).arrange('y')
expected = tp.Tibble({'y': ['a', 'b'], 'avg_x': [0.5, 2]})
expected = tp.tibble({'y': ['a', 'b'], 'avg_x': [0.5, 2]})
assert actual.equals(expected), "group summarize failed"

def test_group_summarize_across():
"""Can summarize across by group"""
df = tp.Tibble({'x': range(3), 'y': range(3, 6), 'z': ['a', 'a', 'b']})
df = tp.tibble({'x': range(3), 'y': range(3, 6), 'z': ['a', 'a', 'b']})
actual = (
df
.summarize(col(['x', 'y']).max().name.prefix('max_'),
avg_x = col('x').mean(),
_by = [col('z')])
.arrange('z')
)
expected = tp.Tibble({'z': ['a', 'b'],
expected = tp.tibble({'z': ['a', 'b'],
'max_x': [1, 2],
'max_y': [4, 5],
'avg_x': [0.5, 2]})
Expand Down
8 changes: 4 additions & 4 deletions tests/test_lubridate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

def test_date():
"""Can do date operations"""
df = tp.Tibble(x = ['2021-01-01', '2021-10-01']).mutate(date = col('x').str.strptime(tp.Date))
df = tp.tibble(x = ['2021-01-01', '2021-10-01']).mutate(date = col('x').str.strptime(tp.Date))
actual = (
df
.mutate(date_check = tp.as_date('x'),
Expand All @@ -16,7 +16,7 @@ def test_date():
)
)
expected = (
tp.Tibble(x = ['2021-01-01', '2021-10-01'])
tp.tibble(x = ['2021-01-01', '2021-10-01'])
.mutate(date = col('x').str.strptime(tp.Date))
.mutate(date_check = col('date'),
mday = col('date').dt.day(),
Expand All @@ -31,11 +31,11 @@ def test_date():

def test_as_date_format():
"""Can pass fmt to as_date"""
df = tp.Tibble(date = ['12/31/2021'])
df = tp.tibble(date = ['12/31/2021'])
out = df.mutate(date_parsed = tp.as_date(col('date'), format = '%m/%d/%Y'))
assert out.pull().dtype == tp.Date, "as_date format failed"

def test_make_date():
df = tp.Tibble(date = ['2021-12-1']).mutate(date = tp.as_date('date'))
df = tp.tibble(date = ['2021-12-1']).mutate(date = tp.as_date('date'))
out = df.mutate(date = tp.make_date(2021, 12, 1))
assert df.pull('date').equals(out.pull('date')), "make_date failed"
Loading

0 comments on commit 3ea0028

Please sign in to comment.