Merge pull request #259 from markfairbanks/tibble-class

Convert `Tibble`class to `tibble`
markfairbanks · Sep 23, 2024 · 3ea0028 · 3ea0028
2 parents af4d458 + f35d341
commit 3ea0028
Show file tree

Hide file tree

Showing 12 changed files with 316 additions and 308 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,7 +2,15 @@
 
 ## v0.3.0 (in development)
 
-* Major refactor to work with `polars>=0.20.8`
+* Major refactor to work with `polars>=1.0.0`
+
+#### Functionality improvements
+
+* Convert `by` arg `_by` to allow naming columns `by` in `.mutate()`/`.summarize()`
+
+#### New functions
+
+* `where()`
 
 ## v0.2.19
 

diff --git a/tests/test_funs.py b/tests/test_funs.py
@@ -5,14 +5,14 @@
 
 def test_abs():
     """Can get absolute value"""
-    df = tp.Tibble(x = range(-3, 0))
+    df = tp.tibble(x = range(-3, 0))
     actual = df.mutate(abs_x = tp.abs('x'), abs_col_x = tp.abs(col('x')))
-    expected = tp.Tibble(x = range(-3, 0), abs_x = range(3, 0, -1), abs_col_x = range(3, 0, -1))
+    expected = tp.tibble(x = range(-3, 0), abs_x = range(3, 0, -1), abs_col_x = range(3, 0, -1))
     assert actual.equals(expected), "abs failed"
 
 def test_agg_stats():
     """Can get aggregation statistics"""
-    df = tp.Tibble(x = range(3), y = [2, 1, 0])
+    df = tp.tibble(x = range(3), y = [2, 1, 0])
     actual = (
         df
         .summarize(
@@ -34,7 +34,7 @@ def test_agg_stats():
         )
         .mutate(tp.as_integer(cs.numeric().as_expr()))
     )
-    expected = tp.Tibble(
+    expected = tp.tibble(
         corr = [-1],
         count_x = [3], count_col_x = [3],
         cov = [-1],
@@ -55,16 +55,16 @@ def test_agg_stats():
 
 def test_case_when():
     """Can use case_when"""
-    df = tp.Tibble(x = range(1, 4))
+    df = tp.tibble(x = range(1, 4))
     actual = df.mutate(case_x = tp.case_when(col('x') < 2).then(0)
                                 .when(col('x') < 3).then(1)
                                 .otherwise(0))
-    expected = tp.Tibble(x = range(1, 4), case_x = [0, 1, 0])
+    expected = tp.tibble(x = range(1, 4), case_x = [0, 1, 0])
     assert actual.equals(expected), "case_when failed"
 
 def test_casting():
     """Can do type casting"""
-    df = tp.Tibble(int_col = [0, 0, 1], float_col = [1.0, 2.0, 3.0], chr_col = ["1", "2", "3"])
+    df = tp.tibble(int_col = [0, 0, 1], float_col = [1.0, 2.0, 3.0], chr_col = ["1", "2", "3"])
     actual = (
         df
         .mutate(float_cast = tp.as_float('int_col'),
@@ -73,70 +73,70 @@ def test_casting():
                 bool_cast = tp.as_boolean('int_col'))
         .select('float_cast', 'int_cast', 'string_cast', 'bool_cast')
     )
-    expected = tp.Tibble(float_cast = [0.0, 0.0, 1.0],
+    expected = tp.tibble(float_cast = [0.0, 0.0, 1.0],
                          int_cast = [1, 2, 3],
                          string_cast = ["0", "0", "1"],
                          bool_cast = [False, False, True])
     assert actual.equals(expected), "casting failed"
 
 def test_coalesce():
     """Can use coalesce"""
-    df = tp.Tibble(x = [None, None, 1], y = [2, None, 2], z = [3, 3, 3])
+    df = tp.tibble(x = [None, None, 1], y = [2, None, 2], z = [3, 3, 3])
     actual = (
         df
         .mutate(
             coalesce_x = tp.coalesce(col('x'), col('y'), col('z'))
         )
         .select('coalesce_x')
     )
-    expected = tp.Tibble(coalesce_x = [2, 3, 1])
+    expected = tp.tibble(coalesce_x = [2, 3, 1])
     assert actual.equals(expected), "coalesce failed"
 
 def test_floor():
     """Can get the floor"""
-    df = tp.Tibble(x = [1.1, 5.5])
+    df = tp.tibble(x = [1.1, 5.5])
     actual = df.mutate(floor_x = tp.floor('x')).select('floor_x')
-    expected = tp.Tibble(floor_x = [1.0, 5.0])
+    expected = tp.tibble(floor_x = [1.0, 5.0])
     assert actual.equals(expected), "floor failed"
 
 def test_lag():
     """Can get lagging values with function"""
-    df = tp.Tibble({'x': range(3)})
+    df = tp.tibble({'x': range(3)})
     actual = df.mutate(lag_null = tp.lag(col('x')),
                        lag_default = tp.lag('x', default = 1))
-    expected = tp.Tibble({'x': range(3),
+    expected = tp.tibble({'x': range(3),
                           'lag_null': [None, 0, 1],
                           'lag_default': [1, 0, 1]})
     assert actual.equals(expected, null_equal = True), "lag failed"
 
 def test_lead():
     """Can get leading values with function"""
-    df = tp.Tibble({'x': range(3)})
+    df = tp.tibble({'x': range(3)})
     actual = df.mutate(lead_null = tp.lead(col('x')),
                        lead_default = tp.lead('x', default = 1))
-    expected = tp.Tibble({'x': range(3),
+    expected = tp.tibble({'x': range(3),
                           'lead_null': [1, 2, None],
                           'lead_default': [1, 2, 1]})
     assert actual.equals(expected, null_equal = True), "lead failed"
 
 def test_logs():
     """Can get leading values with function"""
-    df = tp.Tibble({'x': range(1, 4)})
+    df = tp.tibble({'x': range(1, 4)})
     actual = df.mutate(log = tp.log(col('x')).round(2),
                        log10 = tp.log10('x').round(2))
     expected = df.mutate(log = col('x').log().round(2), log10 = col('x').log10().round(2))
     assert actual.equals(expected), "log failed"
 
 def test_if_else():
     """Can use if_else"""
-    df = tp.Tibble(x = range(1, 4))
+    df = tp.tibble(x = range(1, 4))
     actual = df.mutate(case_x = tp.if_else(col('x') < 2, 1, 0))
-    expected = tp.Tibble(x = range(1, 4), case_x = [1, 0, 0])
+    expected = tp.tibble(x = range(1, 4), case_x = [1, 0, 0])
     assert actual.equals(expected), "if_else failed"
 
 def test_is_predicates():
     """Can use is predicates"""
-    df = tp.Tibble(x = [0.0, 1.0, 2.0],
+    df = tp.tibble(x = [0.0, 1.0, 2.0],
                    y = [None, math.inf, math.nan])
     actual = (
         df
@@ -152,7 +152,7 @@ def test_is_predicates():
 
         )
     ).drop(['x', 'y'])
-    expected = tp.Tibble(
+    expected = tp.tibble(
         between = [False, True, True],
         is_finite = [True, True, True],
         is_in = [False, True, True],
@@ -165,7 +165,7 @@ def test_is_predicates():
     assert actual.equals(expected, null_equal = True), "is_predicates failed"
 
 def test_rep():
-    df = tp.Tibble(x = [0, 1], y = [0, 1])
+    df = tp.tibble(x = [0, 1], y = [0, 1])
     assert tp.rep(df, 2).equals(df.bind_rows(df)), "rep df failed"
     assert tp.rep(1, 2).equals(tp.Series([1, 1])), "rep int failed"
     assert tp.rep("a", 2).equals(tp.Series(["a", "a"])), "rep str failed"
@@ -174,38 +174,38 @@ def test_rep():
 
 def test_replace_null():
     """Can replace nulls"""
-    df = tp.Tibble(x = [0, None], y = [None, None])
+    df = tp.tibble(x = [0, None], y = [None, None])
     actual = df.mutate(x = tp.replace_null(col('x'), 1))
-    expected = tp.Tibble(x = [0, 1], y = [None, None])
+    expected = tp.tibble(x = [0, 1], y = [None, None])
     assert actual.equals(expected), "replace_null function failed"
 
 def test_row_number():
     """Can get row number"""
-    df = tp.Tibble(x = ['a', 'a', 'b'])
+    df = tp.tibble(x = ['a', 'a', 'b'])
     actual = df.mutate(row_num = tp.row_number())
-    expected = tp.Tibble(x = ['a', 'a', 'b'], row_num = [1, 2, 3])
+    expected = tp.tibble(x = ['a', 'a', 'b'], row_num = [1, 2, 3])
     assert actual.equals(expected), "row_number failed"
 
 def test_row_number_group():
     """Can get row number by group"""
-    df = tp.Tibble(x = ['a', 'a', 'b'])
+    df = tp.tibble(x = ['a', 'a', 'b'])
     actual = (
         df.mutate(group_row_num = tp.row_number(), _by = 'x')
         .arrange('x', 'group_row_num')
     )
-    expected = tp.Tibble(x = ['a', 'a', 'b'], group_row_num = [1, 2, 1])
+    expected = tp.tibble(x = ['a', 'a', 'b'], group_row_num = [1, 2, 1])
     assert actual.equals(expected), "group row_number failed"
 
 def test_round():
     """Can round values"""
-    df = tp.Tibble(x = [1.11, 2.22, 3.33])
+    df = tp.tibble(x = [1.11, 2.22, 3.33])
     actual = df.mutate(x = tp.round(col('x'), 1))
-    expected = tp.Tibble(x = [1.1, 2.2, 3.3])
+    expected = tp.tibble(x = [1.1, 2.2, 3.3])
     assert actual.equals(expected), "round failed"
 
 def test_sqrt():
     """Can get the square root"""
-    df = tp.Tibble(x = [9, 25, 100])
+    df = tp.tibble(x = [9, 25, 100])
     actual = df.mutate(x = tp.sqrt('x'))
-    expected = tp.Tibble(x = [3, 5, 10])
+    expected = tp.tibble(x = [3, 5, 10])
     assert actual.equals(expected), "sqrt failed"
diff --git a/tests/test_groupby.py b/tests/test_groupby.py
@@ -3,65 +3,65 @@
 
 def test_group_filter():
     """Can filter by group"""
-    df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
+    df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
     actual = (
         df.filter(col('x') <= col('x').mean(),
                   _by = 'y')
         .arrange('y')
     )
-    expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']})
+    expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']})
     assert actual.equals(expected), "group filter failed"
 
 def test_group_mutate():
     """Can mutate by group"""
-    df = tp.Tibble({'x': range(2), 'y': ['a', 'b']})
+    df = tp.tibble({'x': range(2), 'y': ['a', 'b']})
     actual = (
         df.mutate(avg_x = col('x').mean(),
                   _by = 'y')
         .arrange('y')
     )
-    expected = tp.Tibble({'x': [0, 1], 'y': ['a', 'b'], 'avg_x': [0, 1]})
+    expected = tp.tibble({'x': [0, 1], 'y': ['a', 'b'], 'avg_x': [0, 1]})
     assert actual.equals(expected), "group mutate failed"
 
 def test_group_slice():
     """Can slice by group"""
-    df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
+    df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
     actual = df.slice(0, _by = 'y').arrange('y')
-    expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']})
+    expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']})
     assert actual.equals(expected), "group slice failed"
 
 def test_group_slice_head():
     """Can slice_head by group"""
-    df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
+    df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
     actual = df.slice_head(1, _by = 'y').arrange('y')
-    expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']})
+    expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']})
     assert actual.equals(expected), "group slice_head failed"
 
 def test_group_slice_tail():
     """Can slice_tail by group"""
-    df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
+    df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
     actual = df.slice_tail(1, _by = 'y').arrange('y')
-    expected = tp.Tibble({'x': [1, 2], 'y': ['a', 'b']})
+    expected = tp.tibble({'x': [1, 2], 'y': ['a', 'b']})
     assert actual.equals(expected), "group slice_tail failed"
 
 def test_group_summarize():
     """Can summarize by group"""
-    df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']})
+    df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']})
     actual = df.summarize(avg_x = col('x').mean(), _by = col('y')).arrange('y')
-    expected = tp.Tibble({'y': ['a', 'b'], 'avg_x': [0.5, 2]})
+    expected = tp.tibble({'y': ['a', 'b'], 'avg_x': [0.5, 2]})
     assert actual.equals(expected), "group summarize failed"
 
 def test_group_summarize_across():
     """Can summarize across by group"""
-    df = tp.Tibble({'x': range(3), 'y': range(3, 6), 'z': ['a', 'a', 'b']})
+    df = tp.tibble({'x': range(3), 'y': range(3, 6), 'z': ['a', 'a', 'b']})
     actual = (
         df
         .summarize(col(['x', 'y']).max().name.prefix('max_'),
                    avg_x = col('x').mean(),
                    _by = [col('z')])
         .arrange('z')
     )
-    expected = tp.Tibble({'z': ['a', 'b'],
+    expected = tp.tibble({'z': ['a', 'b'],
                           'max_x': [1, 2],
                           'max_y': [4, 5],
                           'avg_x': [0.5, 2]})

diff --git a/tests/test_lubridate.py b/tests/test_lubridate.py
@@ -3,7 +3,7 @@
 
 def test_date():
     """Can do date operations"""
-    df = tp.Tibble(x = ['2021-01-01', '2021-10-01']).mutate(date = col('x').str.strptime(tp.Date))
+    df = tp.tibble(x = ['2021-01-01', '2021-10-01']).mutate(date = col('x').str.strptime(tp.Date))
     actual = (
         df
         .mutate(date_check = tp.as_date('x'),
@@ -16,7 +16,7 @@ def test_date():
         )
     )
     expected = (
-        tp.Tibble(x = ['2021-01-01', '2021-10-01'])
+        tp.tibble(x = ['2021-01-01', '2021-10-01'])
         .mutate(date = col('x').str.strptime(tp.Date))
         .mutate(date_check = col('date'),
                 mday = col('date').dt.day(),
@@ -31,11 +31,11 @@ def test_date():
 
 def test_as_date_format():
     """Can pass fmt to as_date"""
-    df = tp.Tibble(date = ['12/31/2021'])
+    df = tp.tibble(date = ['12/31/2021'])
     out = df.mutate(date_parsed = tp.as_date(col('date'), format = '%m/%d/%Y'))
     assert out.pull().dtype == tp.Date, "as_date format failed"
 
 def test_make_date():
-    df = tp.Tibble(date = ['2021-12-1']).mutate(date = tp.as_date('date'))
+    df = tp.tibble(date = ['2021-12-1']).mutate(date = tp.as_date('date'))
     out = df.mutate(date = tp.make_date(2021, 12, 1))
     assert df.pull('date').equals(out.pull('date')), "make_date failed"