diff --git a/CHANGELOG.md b/CHANGELOG.md index 1996c3c..3063dbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,15 @@ ## v0.3.0 (in development) -* Major refactor to work with `polars>=0.20.8` +* Major refactor to work with `polars>=1.0.0` + +#### Functionality improvements + +* Convert `by` arg `_by` to allow naming columns `by` in `.mutate()`/`.summarize()` + +#### New functions + +* `where()` ## v0.2.19 diff --git a/tests/test_funs.py b/tests/test_funs.py index 4a69f26..d7f394b 100644 --- a/tests/test_funs.py +++ b/tests/test_funs.py @@ -5,14 +5,14 @@ def test_abs(): """Can get absolute value""" - df = tp.Tibble(x = range(-3, 0)) + df = tp.tibble(x = range(-3, 0)) actual = df.mutate(abs_x = tp.abs('x'), abs_col_x = tp.abs(col('x'))) - expected = tp.Tibble(x = range(-3, 0), abs_x = range(3, 0, -1), abs_col_x = range(3, 0, -1)) + expected = tp.tibble(x = range(-3, 0), abs_x = range(3, 0, -1), abs_col_x = range(3, 0, -1)) assert actual.equals(expected), "abs failed" def test_agg_stats(): """Can get aggregation statistics""" - df = tp.Tibble(x = range(3), y = [2, 1, 0]) + df = tp.tibble(x = range(3), y = [2, 1, 0]) actual = ( df .summarize( @@ -34,7 +34,7 @@ def test_agg_stats(): ) .mutate(tp.as_integer(cs.numeric().as_expr())) ) - expected = tp.Tibble( + expected = tp.tibble( corr = [-1], count_x = [3], count_col_x = [3], cov = [-1], @@ -55,16 +55,16 @@ def test_agg_stats(): def test_case_when(): """Can use case_when""" - df = tp.Tibble(x = range(1, 4)) + df = tp.tibble(x = range(1, 4)) actual = df.mutate(case_x = tp.case_when(col('x') < 2).then(0) .when(col('x') < 3).then(1) .otherwise(0)) - expected = tp.Tibble(x = range(1, 4), case_x = [0, 1, 0]) + expected = tp.tibble(x = range(1, 4), case_x = [0, 1, 0]) assert actual.equals(expected), "case_when failed" def test_casting(): """Can do type casting""" - df = tp.Tibble(int_col = [0, 0, 1], float_col = [1.0, 2.0, 3.0], chr_col = ["1", "2", "3"]) + df = tp.tibble(int_col = [0, 0, 1], float_col = [1.0, 2.0, 3.0], chr_col = ["1", "2", "3"]) actual = ( df .mutate(float_cast = tp.as_float('int_col'), @@ -73,7 +73,7 @@ def test_casting(): bool_cast = tp.as_boolean('int_col')) .select('float_cast', 'int_cast', 'string_cast', 'bool_cast') ) - expected = tp.Tibble(float_cast = [0.0, 0.0, 1.0], + expected = tp.tibble(float_cast = [0.0, 0.0, 1.0], int_cast = [1, 2, 3], string_cast = ["0", "0", "1"], bool_cast = [False, False, True]) @@ -81,7 +81,7 @@ def test_casting(): def test_coalesce(): """Can use coalesce""" - df = tp.Tibble(x = [None, None, 1], y = [2, None, 2], z = [3, 3, 3]) + df = tp.tibble(x = [None, None, 1], y = [2, None, 2], z = [3, 3, 3]) actual = ( df .mutate( @@ -89,39 +89,39 @@ def test_coalesce(): ) .select('coalesce_x') ) - expected = tp.Tibble(coalesce_x = [2, 3, 1]) + expected = tp.tibble(coalesce_x = [2, 3, 1]) assert actual.equals(expected), "coalesce failed" def test_floor(): """Can get the floor""" - df = tp.Tibble(x = [1.1, 5.5]) + df = tp.tibble(x = [1.1, 5.5]) actual = df.mutate(floor_x = tp.floor('x')).select('floor_x') - expected = tp.Tibble(floor_x = [1.0, 5.0]) + expected = tp.tibble(floor_x = [1.0, 5.0]) assert actual.equals(expected), "floor failed" def test_lag(): """Can get lagging values with function""" - df = tp.Tibble({'x': range(3)}) + df = tp.tibble({'x': range(3)}) actual = df.mutate(lag_null = tp.lag(col('x')), lag_default = tp.lag('x', default = 1)) - expected = tp.Tibble({'x': range(3), + expected = tp.tibble({'x': range(3), 'lag_null': [None, 0, 1], 'lag_default': [1, 0, 1]}) assert actual.equals(expected, null_equal = True), "lag failed" def test_lead(): """Can get leading values with function""" - df = tp.Tibble({'x': range(3)}) + df = tp.tibble({'x': range(3)}) actual = df.mutate(lead_null = tp.lead(col('x')), lead_default = tp.lead('x', default = 1)) - expected = tp.Tibble({'x': range(3), + expected = tp.tibble({'x': range(3), 'lead_null': [1, 2, None], 'lead_default': [1, 2, 1]}) assert actual.equals(expected, null_equal = True), "lead failed" def test_logs(): """Can get leading values with function""" - df = tp.Tibble({'x': range(1, 4)}) + df = tp.tibble({'x': range(1, 4)}) actual = df.mutate(log = tp.log(col('x')).round(2), log10 = tp.log10('x').round(2)) expected = df.mutate(log = col('x').log().round(2), log10 = col('x').log10().round(2)) @@ -129,14 +129,14 @@ def test_logs(): def test_if_else(): """Can use if_else""" - df = tp.Tibble(x = range(1, 4)) + df = tp.tibble(x = range(1, 4)) actual = df.mutate(case_x = tp.if_else(col('x') < 2, 1, 0)) - expected = tp.Tibble(x = range(1, 4), case_x = [1, 0, 0]) + expected = tp.tibble(x = range(1, 4), case_x = [1, 0, 0]) assert actual.equals(expected), "if_else failed" def test_is_predicates(): """Can use is predicates""" - df = tp.Tibble(x = [0.0, 1.0, 2.0], + df = tp.tibble(x = [0.0, 1.0, 2.0], y = [None, math.inf, math.nan]) actual = ( df @@ -152,7 +152,7 @@ def test_is_predicates(): ) ).drop(['x', 'y']) - expected = tp.Tibble( + expected = tp.tibble( between = [False, True, True], is_finite = [True, True, True], is_in = [False, True, True], @@ -165,7 +165,7 @@ def test_is_predicates(): assert actual.equals(expected, null_equal = True), "is_predicates failed" def test_rep(): - df = tp.Tibble(x = [0, 1], y = [0, 1]) + df = tp.tibble(x = [0, 1], y = [0, 1]) assert tp.rep(df, 2).equals(df.bind_rows(df)), "rep df failed" assert tp.rep(1, 2).equals(tp.Series([1, 1])), "rep int failed" assert tp.rep("a", 2).equals(tp.Series(["a", "a"])), "rep str failed" @@ -174,38 +174,38 @@ def test_rep(): def test_replace_null(): """Can replace nulls""" - df = tp.Tibble(x = [0, None], y = [None, None]) + df = tp.tibble(x = [0, None], y = [None, None]) actual = df.mutate(x = tp.replace_null(col('x'), 1)) - expected = tp.Tibble(x = [0, 1], y = [None, None]) + expected = tp.tibble(x = [0, 1], y = [None, None]) assert actual.equals(expected), "replace_null function failed" def test_row_number(): """Can get row number""" - df = tp.Tibble(x = ['a', 'a', 'b']) + df = tp.tibble(x = ['a', 'a', 'b']) actual = df.mutate(row_num = tp.row_number()) - expected = tp.Tibble(x = ['a', 'a', 'b'], row_num = [1, 2, 3]) + expected = tp.tibble(x = ['a', 'a', 'b'], row_num = [1, 2, 3]) assert actual.equals(expected), "row_number failed" def test_row_number_group(): """Can get row number by group""" - df = tp.Tibble(x = ['a', 'a', 'b']) + df = tp.tibble(x = ['a', 'a', 'b']) actual = ( df.mutate(group_row_num = tp.row_number(), _by = 'x') .arrange('x', 'group_row_num') ) - expected = tp.Tibble(x = ['a', 'a', 'b'], group_row_num = [1, 2, 1]) + expected = tp.tibble(x = ['a', 'a', 'b'], group_row_num = [1, 2, 1]) assert actual.equals(expected), "group row_number failed" def test_round(): """Can round values""" - df = tp.Tibble(x = [1.11, 2.22, 3.33]) + df = tp.tibble(x = [1.11, 2.22, 3.33]) actual = df.mutate(x = tp.round(col('x'), 1)) - expected = tp.Tibble(x = [1.1, 2.2, 3.3]) + expected = tp.tibble(x = [1.1, 2.2, 3.3]) assert actual.equals(expected), "round failed" def test_sqrt(): """Can get the square root""" - df = tp.Tibble(x = [9, 25, 100]) + df = tp.tibble(x = [9, 25, 100]) actual = df.mutate(x = tp.sqrt('x')) - expected = tp.Tibble(x = [3, 5, 10]) + expected = tp.tibble(x = [3, 5, 10]) assert actual.equals(expected), "sqrt failed" \ No newline at end of file diff --git a/tests/test_groupby.py b/tests/test_groupby.py index 7ee961a..6fa3239 100644 --- a/tests/test_groupby.py +++ b/tests/test_groupby.py @@ -3,57 +3,57 @@ def test_group_filter(): """Can filter by group""" - df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']}) actual = ( df.filter(col('x') <= col('x').mean(), _by = 'y') .arrange('y') ) - expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']}) + expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']}) assert actual.equals(expected), "group filter failed" def test_group_mutate(): """Can mutate by group""" - df = tp.Tibble({'x': range(2), 'y': ['a', 'b']}) + df = tp.tibble({'x': range(2), 'y': ['a', 'b']}) actual = ( df.mutate(avg_x = col('x').mean(), _by = 'y') .arrange('y') ) - expected = tp.Tibble({'x': [0, 1], 'y': ['a', 'b'], 'avg_x': [0, 1]}) + expected = tp.tibble({'x': [0, 1], 'y': ['a', 'b'], 'avg_x': [0, 1]}) assert actual.equals(expected), "group mutate failed" def test_group_slice(): """Can slice by group""" - df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']}) actual = df.slice(0, _by = 'y').arrange('y') - expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']}) + expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']}) assert actual.equals(expected), "group slice failed" def test_group_slice_head(): """Can slice_head by group""" - df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']}) actual = df.slice_head(1, _by = 'y').arrange('y') - expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']}) + expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']}) assert actual.equals(expected), "group slice_head failed" def test_group_slice_tail(): """Can slice_tail by group""" - df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']}) actual = df.slice_tail(1, _by = 'y').arrange('y') - expected = tp.Tibble({'x': [1, 2], 'y': ['a', 'b']}) + expected = tp.tibble({'x': [1, 2], 'y': ['a', 'b']}) assert actual.equals(expected), "group slice_tail failed" def test_group_summarize(): """Can summarize by group""" - df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']}) actual = df.summarize(avg_x = col('x').mean(), _by = col('y')).arrange('y') - expected = tp.Tibble({'y': ['a', 'b'], 'avg_x': [0.5, 2]}) + expected = tp.tibble({'y': ['a', 'b'], 'avg_x': [0.5, 2]}) assert actual.equals(expected), "group summarize failed" def test_group_summarize_across(): """Can summarize across by group""" - df = tp.Tibble({'x': range(3), 'y': range(3, 6), 'z': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': range(3, 6), 'z': ['a', 'a', 'b']}) actual = ( df .summarize(col(['x', 'y']).max().name.prefix('max_'), @@ -61,7 +61,7 @@ def test_group_summarize_across(): _by = [col('z')]) .arrange('z') ) - expected = tp.Tibble({'z': ['a', 'b'], + expected = tp.tibble({'z': ['a', 'b'], 'max_x': [1, 2], 'max_y': [4, 5], 'avg_x': [0.5, 2]}) diff --git a/tests/test_lubridate.py b/tests/test_lubridate.py index e60e44c..4137f80 100644 --- a/tests/test_lubridate.py +++ b/tests/test_lubridate.py @@ -3,7 +3,7 @@ def test_date(): """Can do date operations""" - df = tp.Tibble(x = ['2021-01-01', '2021-10-01']).mutate(date = col('x').str.strptime(tp.Date)) + df = tp.tibble(x = ['2021-01-01', '2021-10-01']).mutate(date = col('x').str.strptime(tp.Date)) actual = ( df .mutate(date_check = tp.as_date('x'), @@ -16,7 +16,7 @@ def test_date(): ) ) expected = ( - tp.Tibble(x = ['2021-01-01', '2021-10-01']) + tp.tibble(x = ['2021-01-01', '2021-10-01']) .mutate(date = col('x').str.strptime(tp.Date)) .mutate(date_check = col('date'), mday = col('date').dt.day(), @@ -31,11 +31,11 @@ def test_date(): def test_as_date_format(): """Can pass fmt to as_date""" - df = tp.Tibble(date = ['12/31/2021']) + df = tp.tibble(date = ['12/31/2021']) out = df.mutate(date_parsed = tp.as_date(col('date'), format = '%m/%d/%Y')) assert out.pull().dtype == tp.Date, "as_date format failed" def test_make_date(): - df = tp.Tibble(date = ['2021-12-1']).mutate(date = tp.as_date('date')) + df = tp.tibble(date = ['2021-12-1']).mutate(date = tp.as_date('date')) out = df.mutate(date = tp.make_date(2021, 12, 1)) assert df.pull('date').equals(out.pull('date')), "make_date failed" \ No newline at end of file diff --git a/tests/test_stringr.py b/tests/test_stringr.py index 44c87ab..abbc635 100644 --- a/tests/test_stringr.py +++ b/tests/test_stringr.py @@ -3,135 +3,135 @@ def test_paste(): """Can use paste""" - df = tp.Tibble(x = ['a', 'b', 'c']) + df = tp.tibble(x = ['a', 'b', 'c']) actual = df.mutate(x_end = tp.paste(col('x'), 'end', sep = '_')) - expected = tp.Tibble(x = ['a', 'b', 'c'], x_end = ['a_end', 'b_end', 'c_end']) + expected = tp.tibble(x = ['a', 'b', 'c'], x_end = ['a_end', 'b_end', 'c_end']) assert actual.equals(expected), "paste failed" def test_paste0(): """Can use paste0""" - df = tp.Tibble(x = ['a', 'b', 'c']) + df = tp.tibble(x = ['a', 'b', 'c']) actual = df.mutate(x_end = tp.paste0(col('x'), '_end')) - expected = tp.Tibble(x = ['a', 'b', 'c'], x_end = ['a_end', 'b_end', 'c_end']) + expected = tp.tibble(x = ['a', 'b', 'c'], x_end = ['a_end', 'b_end', 'c_end']) assert actual.equals(expected), "paste0 failed" def test_str_c(): """Can use str_c""" - df = tp.Tibble(x = ['a', 'b', 'c']) + df = tp.tibble(x = ['a', 'b', 'c']) actual = df.mutate(x_end = tp.str_c(col('x'), 'end', sep = '_')) - expected = tp.Tibble(x = ['a', 'b', 'c'], x_end = ['a_end', 'b_end', 'c_end']) + expected = tp.tibble(x = ['a', 'b', 'c'], x_end = ['a_end', 'b_end', 'c_end']) assert actual.equals(expected), "str_c failed" def test_str_detect_single(): """Can str_detect find a single string""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(x = tp.str_detect('name', 'a'), y = tp.str_detect('name', 'a', negate=True)) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape'], + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape'], x = [True, True, True, True], y = [False, False, False, False]) assert actual.equals(expected), "str_detect single failed" def test_str_detect_multiple(): """Can str_detect find multiple strings""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(x = tp.str_detect('name', ['a', 'e']), y = tp.str_detect('name', ['a', 'e'], negate=True)) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape'], + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape'], x = [True, False, True, True], y = [False, True, False, False]) assert actual.equals(expected), "str_detect multiple failed" def test_str_ends(): """Can use str_end""" - df = tp.Tibble(words = ['apple', 'bear', 'amazing']) + df = tp.tibble(words = ['apple', 'bear', 'amazing']) actual = df.filter(tp.str_ends(col('words'), 'ing')) - expected = tp.Tibble(words = ['amazing']) + expected = tp.tibble(words = ['amazing']) assert actual.equals(expected), "str_ends failed" def test_str_extract(): """Can str_extract extract strings""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(x = tp.str_extract('name', 'pp')) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape'], + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape'], x = ['pp', None, None, None]) assert actual.equals(expected), "str_extract failed" def test_str_length(): """Can str_length count strings""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(x = tp.str_length('name')) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape'], + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape'], x = [5, 6, 4, 5]) assert actual.equals(expected), "str_length failed" def test_str_sub(): """Can str_sub can extract strings""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(x = tp.str_sub('name', 0, 3)) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape'], + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape'], x = ['app', 'ban', 'pea', 'gra']) assert actual.equals(expected), "str_sub failed" def test_str_remove_all(): """Can str_remove_all find all strings and remove""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(new_name = tp.str_remove_all(tp.col('name'), 'a')) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape'], new_name = ['pple', 'bnn', 'per', 'grpe']) + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape'], new_name = ['pple', 'bnn', 'per', 'grpe']) assert actual.equals(expected), "str_remove_all failed" def test_str_remove(): """Can str_remove finds first instance of string and remove""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(new_name = tp.str_remove(tp.col('name'), 'a')) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape'], new_name = ['pple', 'bnana', 'per', 'grpe']) + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape'], new_name = ['pple', 'bnana', 'per', 'grpe']) assert actual.equals(expected), "str_remove failed" def test_str_replace_all(): """Can str_replace_all find all strings and replace""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(new_name = tp.str_replace_all(tp.col('name'), 'a', 'A')) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape'], new_name = ['Apple', 'bAnAnA', 'peAr', 'grApe']) + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape'], new_name = ['Apple', 'bAnAnA', 'peAr', 'grApe']) assert actual.equals(expected), "str_replace_all failed" def test_str_replace(): """Can str_replace finds first instance of string and replace""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(new_name = tp.str_replace(tp.col('name'), 'a', 'A')) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape'], new_name = ['Apple', 'bAnana', 'peAr', 'grApe']) + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape'], new_name = ['Apple', 'bAnana', 'peAr', 'grApe']) assert actual.equals(expected), "str_replace failed" def test_str_starts(): """Can use str_starts""" - df = tp.Tibble(words = ['apple', 'bear', 'amazing']) + df = tp.tibble(words = ['apple', 'bear', 'amazing']) actual = df.filter(tp.str_starts(col('words'), 'a')) - expected = tp.Tibble(words = ['apple', 'amazing']) + expected = tp.tibble(words = ['apple', 'amazing']) assert actual.equals(expected), "str_starts failed" def test_str_to_lower(): """Can str_to_lower lowercase a string""" - df = tp.Tibble(name = ['APPLE', 'BANANA', 'PEAR', 'GRAPE']) + df = tp.tibble(name = ['APPLE', 'BANANA', 'PEAR', 'GRAPE']) actual = df.mutate(name = tp.str_to_lower(tp.col('name'))) - expected = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + expected = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) assert actual.equals(expected), "str_to_lower failed" def test_str_to_upper(): """Can str_to_upper uppercase a string""" - df = tp.Tibble(name = ['apple', 'banana', 'pear', 'grape']) + df = tp.tibble(name = ['apple', 'banana', 'pear', 'grape']) actual = df.mutate(name = tp.str_to_upper(tp.col('name'))) - expected = tp.Tibble(name = ['APPLE', 'BANANA', 'PEAR', 'GRAPE']) + expected = tp.tibble(name = ['APPLE', 'BANANA', 'PEAR', 'GRAPE']) assert actual.equals(expected), "str_to_upper failed" def test_str_trim(): """Can str_to_upper uppercase a string""" - df = tp.Tibble(x = [' a ', ' b ', ' c ']) + df = tp.tibble(x = [' a ', ' b ', ' c ']) actual = ( df.mutate(both = tp.str_trim('x'), left = tp.str_trim('x', "left"), right = tp.str_trim('x', "right")) .drop('x') ) - expected = tp.Tibble( + expected = tp.tibble( both = ['a', 'b', 'c'], left = ['a ', 'b ', 'c '], right = [' a', ' b', ' c'] diff --git a/tests/test_tibble.py b/tests/test_tibble.py index 3437ce9..0c85262 100644 --- a/tests/test_tibble.py +++ b/tests/test_tibble.py @@ -5,201 +5,201 @@ def test_arrange1(): """Can arrange ascending""" - df = tp.Tibble(x = ['a', 'a', 'b'], y = [2, 1, 3]) + df = tp.tibble(x = ['a', 'a', 'b'], y = [2, 1, 3]) actual = df.arrange('y') - expected = tp.Tibble(x = ['a', 'a', 'b'], y = [1, 2, 3]) + expected = tp.tibble(x = ['a', 'a', 'b'], y = [1, 2, 3]) assert actual.equals(expected), "arrange ascending failed" - assert type(actual) == tp.Tibble, "arrange didn't return a Tibble" + assert type(actual) == tp.tibble, "arrange didn't return a tibble" def test_arrange2(): """Can arrange descending""" - df = tp.Tibble({'x': ['a', 'a', 'b'], 'y': [2, 1, 3]}) + df = tp.tibble({'x': ['a', 'a', 'b'], 'y': [2, 1, 3]}) actual = df.arrange(tp.desc('x'), 'y') - expected = tp.Tibble({'x': ['b', 'a', 'a'], 'y': [3, 1, 2]}) + expected = tp.tibble({'x': ['b', 'a', 'a'], 'y': [3, 1, 2]}) assert actual.equals(expected), "arrange descending failed" def test_arrange_across(): """Can arrange across""" - df = tp.Tibble({'x': ['a', 'a', 'b'], 'y': [1, 2, 3], 'z': [1, 2, 3]}) + df = tp.tibble({'x': ['a', 'a', 'b'], 'y': [1, 2, 3], 'z': [1, 2, 3]}) actual = df.arrange( tp.across(['x']), tp.across(['y', 'z'], tp.desc) ) - expected = tp.Tibble(x = ['a', 'a', 'b'], y = [2, 1, 3], z = [2, 1, 3]) + expected = tp.tibble(x = ['a', 'a', 'b'], y = [2, 1, 3], z = [2, 1, 3]) assert actual.equals(expected), "arrange across failed" def test_bind_cols_single(): """Can bind_cols""" - df1 = tp.Tibble({'x': ['a', 'a', 'b'], 'y': [1, 2, 3]}) - df2 = tp.Tibble({'z': [4, 4, 4]}) + df1 = tp.tibble({'x': ['a', 'a', 'b'], 'y': [1, 2, 3]}) + df2 = tp.tibble({'z': [4, 4, 4]}) actual = df1.bind_cols(df2) - expected = tp.Tibble({'x': ['a', 'a', 'b'], 'y': [1, 2, 3], 'z':[4, 4, 4]}) + expected = tp.tibble({'x': ['a', 'a', 'b'], 'y': [1, 2, 3], 'z':[4, 4, 4]}) assert actual.equals(expected), "bind_cols failed" - assert type(actual) == tp.Tibble, "bind_cols didn't return a Tibble" + assert type(actual) == tp.tibble, "bind_cols didn't return a tibble" def test_bind_cols_multiple(): """Can bind_cols multiple""" - df1 = tp.Tibble(x = range(3)) - df2 = tp.Tibble(y = range(3)) - df3 = tp.Tibble(z = range(3)) + df1 = tp.tibble(x = range(3)) + df2 = tp.tibble(y = range(3)) + df3 = tp.tibble(z = range(3)) actual = df1.bind_cols(df2, df3) - expected = tp.Tibble(x = range(3), y = range(3), z = range(3)) + expected = tp.tibble(x = range(3), y = range(3), z = range(3)) assert actual.equals(expected), "multiple bind_cols failed" def test_bind_rows_single(): """Can bind rows""" - df1 = tp.Tibble({'x': ['a', 'a'], 'y': [2, 1]}) - df2 = tp.Tibble({'x': ['b'], 'y': [3]}) + df1 = tp.tibble({'x': ['a', 'a'], 'y': [2, 1]}) + df2 = tp.tibble({'x': ['b'], 'y': [3]}) actual = df1.bind_rows(df2) - expected = tp.Tibble({'x': ['a', 'a', 'b'], 'y': [2, 1, 3]}) + expected = tp.tibble({'x': ['a', 'a', 'b'], 'y': [2, 1, 3]}) assert actual.equals(expected), "bind_rows failed" - assert type(actual) == tp.Tibble, "bind_rows didn't return a Tibble" + assert type(actual) == tp.tibble, "bind_rows didn't return a tibble" def test_bind_rows_auto_align(): """Can bind rows""" - df1 = tp.Tibble(x = ['a', 'a'], y = [2, 1]) - df2 = tp.Tibble(y = [3], x = ['b']) + df1 = tp.tibble(x = ['a', 'a'], y = [2, 1]) + df2 = tp.tibble(y = [3], x = ['b']) actual = df1.bind_rows(df2) - expected = tp.Tibble({'x': ['a', 'a', 'b'], 'y': [2, 1, 3]}) + expected = tp.tibble({'x': ['a', 'a', 'b'], 'y': [2, 1, 3]}) assert actual.equals(expected), "bind_rows auto-align failed" def test_bind_rows_multiple(): """Can bind rows (multiple)""" - df1 = tp.Tibble({'x': ['a', 'a'], 'y': [2, 1]}) - df2 = tp.Tibble({'x': ['b'], 'y': [3]}) - df3 = tp.Tibble({'x': ['b'], 'y': [3]}) + df1 = tp.tibble({'x': ['a', 'a'], 'y': [2, 1]}) + df2 = tp.tibble({'x': ['b'], 'y': [3]}) + df3 = tp.tibble({'x': ['b'], 'y': [3]}) actual = df1.bind_rows(df2, df3) - expected = tp.Tibble({'x': ['a', 'a', 'b', 'b'], 'y': [2, 1, 3, 3]}) + expected = tp.tibble({'x': ['a', 'a', 'b', 'b'], 'y': [2, 1, 3, 3]}) assert actual.equals(expected), "bind_rows multiple failed" def test_clone(): - df = tp.Tibble(x = range(3), y = range(3)) + df = tp.tibble(x = range(3), y = range(3)) actual = df.clone() - assert type(actual) == tp.Tibble, "clone didn't return a Tibble" + assert type(actual) == tp.tibble, "clone didn't return a tibble" def test_count_no_args(): """Can count rows (no args)""" - df = tp.Tibble({'x': ['a', 'a', 'b'], 'y': [1, 1, 1]}) + df = tp.tibble({'x': ['a', 'a', 'b'], 'y': [1, 1, 1]}) actual = df.count() - expected = tp.Tibble({'n': [3]}) + expected = tp.tibble({'n': [3]}) assert actual.equals(expected), "count with no args failed" def test_count_one_arg(): """Can count rows (one arg)""" - df = tp.Tibble({'x': ['a', 'a', 'b'], 'y': [1, 1, 1]}) + df = tp.tibble({'x': ['a', 'a', 'b'], 'y': [1, 1, 1]}) actual = df.count('x', sort = True) - expected = tp.Tibble({'x': ['a', 'b'], 'n': [2, 1]}) + expected = tp.tibble({'x': ['a', 'b'], 'n': [2, 1]}) assert actual.equals(expected), "count with one arg failed" def test_distinct_empty(): """Can distinct columns""" - df = tp.Tibble({'x': ['a', 'a', 'b'], 'y': ['a', 'a', 'b']}) + df = tp.tibble({'x': ['a', 'a', 'b'], 'y': ['a', 'a', 'b']}) actual = df.distinct().arrange('x', 'y') - expected = tp.Tibble({'x': ['a', 'b'], 'y': ['a', 'b']}) + expected = tp.tibble({'x': ['a', 'b'], 'y': ['a', 'b']}) assert actual.equals(expected), "empty distinct failed" - assert type(actual) == tp.Tibble, "distinct didn't return a Tibble" + assert type(actual) == tp.tibble, "distinct didn't return a tibble" def test_distinct_select(): """Can distinct columns""" - df = tp.Tibble({'x': ['a', 'a', 'b'], 'y': [2, 1, 3]}) + df = tp.tibble({'x': ['a', 'a', 'b'], 'y': [2, 1, 3]}) actual = df.distinct('x').arrange('x') - expected = tp.Tibble({'x': ['a', 'b']}) + expected = tp.tibble({'x': ['a', 'b']}) assert actual.equals(expected), "distinct with select failed" def test_drop(): """Can drop columns""" - df = tp.Tibble(x = range(3), y = range(3)) + df = tp.tibble(x = range(3), y = range(3)) actual = df.drop('x') - expected = tp.Tibble(y = range(3)) + expected = tp.tibble(y = range(3)) assert actual.equals(expected), "drop failed" - assert type(actual) == tp.Tibble, "drop didn't return a Tibble" + assert type(actual) == tp.tibble, "drop didn't return a tibble" def test_drop_null_empty(): """Can drop nulls from all cols""" - df = tp.Tibble(x = [1, None, 3], y = [None, 2, 3], z = range(1, 4)) + df = tp.tibble(x = [1, None, 3], y = [None, 2, 3], z = range(1, 4)) actual = df.drop_null() - expected = tp.Tibble(x = [3], y = [3], z = [3]) + expected = tp.tibble(x = [3], y = [3], z = [3]) assert actual.equals(expected), "empty drop_null failed" - assert type(actual) == tp.Tibble, "drop_null didn't return a Tibble" + assert type(actual) == tp.tibble, "drop_null didn't return a tibble" def test_drop_null_select(): """Can drop nulls with selection""" - df = tp.Tibble(x = [1, None, 3], y = [None, 2, 3], z = range(1, 4)) + df = tp.tibble(x = [1, None, 3], y = [None, 2, 3], z = range(1, 4)) actual = df.drop_null('x') - expected = tp.Tibble(x = [1, 3], y = [None, 3], z = [1, 3]) + expected = tp.tibble(x = [1, 3], y = [None, 3], z = [1, 3]) assert actual.equals(expected, null_equal = True), "drop_null with selection failed" def test_fill(): """Can fill""" - df = tp.Tibble({'chr': ['a', None], 'int': [1, None]}) + df = tp.tibble({'chr': ['a', None], 'int': [1, None]}) actual = df.fill('chr', 'int') - expected = tp.Tibble({'chr': ['a', 'a'], 'int': [1, 1]}) + expected = tp.tibble({'chr': ['a', 'a'], 'int': [1, 1]}) assert actual.equals(expected), "fill failed" - assert type(actual) == tp.Tibble, "fill didn't return a Tibble" + assert type(actual) == tp.tibble, "fill didn't return a tibble" def test_filter(): """Can filter multiple conditions""" - df = tp.Tibble({'x': range(10), 'y': range(10)}) + df = tp.tibble({'x': range(10), 'y': range(10)}) actual = df.filter(col('x') <= 3, col('y') < 2) - expected = tp.Tibble({'x': range(2), 'y': range(2)}) + expected = tp.tibble({'x': range(2), 'y': range(2)}) assert actual.equals(expected), "filter failed" - assert type(actual) == tp.Tibble, "filter didn't return a Tibble" + assert type(actual) == tp.tibble, "filter didn't return a tibble" def test_filter_grouped(): - df = tp.Tibble(x = range(3), y = ['a', 'a', 'b']) + df = tp.tibble(x = range(3), y = ['a', 'a', 'b']) actual = df.filter(col('x') <= col('x').mean(), _by = 'y').arrange('y') - expected = tp.Tibble(x = [0, 2], y = ['a', 'b']) + expected = tp.tibble(x = [0, 2], y = ['a', 'b']) assert actual.equals(expected), "grouped filter failed" - assert type(actual) == tp.Tibble, "grouped filter didn't return a Tibble" + assert type(actual) == tp.tibble, "grouped filter didn't return a tibble" def test_full_join(): """Can perform a full join""" - df1 = tp.Tibble(x = ['a', 'a', 'b'], y = range(3)) - df2 = tp.Tibble(x = ['a'], z = range(1)) + df1 = tp.tibble(x = ['a', 'a', 'b'], y = range(3)) + df2 = tp.tibble(x = ['a'], z = range(1)) actual = df1.full_join(df2) - expected = tp.Tibble(x = ['a', 'a', 'b'], y = [0, 1, 2], z = [0, 0, None]) + expected = tp.tibble(x = ['a', 'a', 'b'], y = [0, 1, 2], z = [0, 0, None]) assert actual.equals(expected, null_equal = True), "full_join failed" - assert type(actual) == tp.Tibble, "full_join didn't return a Tibble" + assert type(actual) == tp.tibble, "full_join didn't return a tibble" def test_inner_join(): """Can perform a inner join""" - df1 = tp.Tibble(x = ['a', 'a', 'b'], y = range(3)) - df2 = tp.Tibble(x = ['a'], z = range(1)) + df1 = tp.tibble(x = ['a', 'a', 'b'], y = range(3)) + df2 = tp.tibble(x = ['a'], z = range(1)) actual = df1.inner_join(df2) - expected = tp.Tibble(x = ['a', 'a'], y = [0, 1], z = [0, 0]) + expected = tp.tibble(x = ['a', 'a'], y = [0, 1], z = [0, 0]) assert actual.equals(expected), "inner_join failed" - assert type(actual) == tp.Tibble, "inner_join didn't return a Tibble" + assert type(actual) == tp.tibble, "inner_join didn't return a tibble" def test_left_join(): """Can perform a left join""" - df1 = tp.Tibble(x = ['a', 'a', 'b'], y = range(3)) - df2 = tp.Tibble(x = ['a', 'b'], z = range(2)) + df1 = tp.tibble(x = ['a', 'a', 'b'], y = range(3)) + df2 = tp.tibble(x = ['a', 'b'], z = range(2)) actual = df1.left_join(df2) - expected = tp.Tibble(x = ['a', 'a', 'b'], y = range(3), z = [0, 0 ,1]) + expected = tp.tibble(x = ['a', 'a', 'b'], y = range(3), z = [0, 0 ,1]) assert actual.equals(expected), "left_join failed" - assert type(actual) == tp.Tibble, "left_join didn't return a Tibble" + assert type(actual) == tp.tibble, "left_join didn't return a tibble" def test_mutate(): """Can edit existing columns and can add columns""" - df = tp.Tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) + df = tp.tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) actual = df.mutate(double_x = col('x') * 2, y = col('y') + 10, y_plus_3 = col('y') + 3) - expected = tp.Tibble( + expected = tp.tibble( x = _repeat(1, 3), y = _repeat(12, 3), double_x = _repeat(2, 3), y_plus_3 = _repeat(15, 3) ) assert actual.equals(expected), "mutate failed" - assert type(actual) == tp.Tibble, "mutate didn't return a Tibble" + assert type(actual) == tp.tibble, "mutate didn't return a tibble" def test_mutate_across(): """Can mutate multiple columns simultaneously""" - df = tp.Tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) + df = tp.tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) actual = df.mutate(tp.across(tp.Int64, lambda x: x * 2), x_plus_y = col('x') + col('y')) - expected = tp.Tibble( + expected = tp.tibble( {'x': _repeat(2, 3), 'y': _repeat(4, 3), 'x_plus_y': _repeat(6, 3)} @@ -208,9 +208,9 @@ def test_mutate_across(): def test_mutate_constant(): """Can add a constant value without tp.lit""" - df = tp.Tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) + df = tp.tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) actual = df.mutate(z = "z") - expected = tp.Tibble( + expected = tp.tibble( x = _repeat(1, 3), y = _repeat(2, 3), z = _repeat('z', 3) @@ -219,235 +219,235 @@ def test_mutate_constant(): def test_names(): """Can get column names""" - df = tp.Tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) + df = tp.tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) assert df.names == ['x', 'y'], "names failed" def test_ncol(): """Can number of columns""" - df = tp.Tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) + df = tp.tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) assert df.ncol == 2, "ncol failed" def test_nrow(): """Can number of rows""" - df = tp.Tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) + df = tp.tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) assert df.nrow == 3, "nrow failed" def test_pivot_longer1(): "Can pivot all (unspecified) cols to long" - df = tp.Tibble({'x': [1, 2], 'y': [3, 4]}) + df = tp.tibble({'x': [1, 2], 'y': [3, 4]}) actual = df.pivot_longer() - expected = tp.Tibble({'name': ['x', 'x', 'y', 'y'], 'value': range(1, 5)}) + expected = tp.tibble({'name': ['x', 'x', 'y', 'y'], 'value': range(1, 5)}) assert actual.equals(expected), "unspecified pivot_longer failed" - assert type(actual) == tp.Tibble, "pivot_longer didn't return a Tibble" + assert type(actual) == tp.tibble, "pivot_longer didn't return a tibble" def test_pivot_longer2(): """Can pivot all (specified) cols to long""" - df = tp.Tibble({'x': [1, 2], 'y': [3, 4]}) + df = tp.tibble({'x': [1, 2], 'y': [3, 4]}) actual = df.pivot_longer(['x', 'y']) - expected = tp.Tibble({'name': ['x', 'x', 'y', 'y'], 'value': range(1, 5)}) + expected = tp.tibble({'name': ['x', 'x', 'y', 'y'], 'value': range(1, 5)}) assert actual.equals(expected), "specified pivot_longer failed" def test_pivot_wider1(): """Can pivot all cols to wide""" - df = tp.Tibble({'label': ['x', 'y', 'z'], 'val': range(1, 4)}) + df = tp.tibble({'label': ['x', 'y', 'z'], 'val': range(1, 4)}) actual = df.pivot_wider(names_from = 'label', values_from = 'val').select('x', 'y', 'z') - expected = tp.Tibble({'x': [1], 'y': [2], 'z': [3]}) + expected = tp.tibble({'x': [1], 'y': [2], 'z': [3]}) assert actual.equals(expected), "pivot_wider all cols failed" - assert type(actual) == tp.Tibble, "pivot_wider didn't return a Tibble" + assert type(actual) == tp.tibble, "pivot_wider didn't return a tibble" def test_pivot_wider2(): """Can pivot cols to wide with id col""" - df = tp.Tibble({'id': _repeat(1, 3), 'label': ['x', 'y', 'z'], 'val': range(1, 4)}) + df = tp.tibble({'id': _repeat(1, 3), 'label': ['x', 'y', 'z'], 'val': range(1, 4)}) actual = df.pivot_wider(names_from = 'label', values_from = 'val').select('id', 'x', 'y', 'z') - expected = tp.Tibble({'id': [1], 'x': [1], 'y': [2], 'z': [3]}) + expected = tp.tibble({'id': [1], 'x': [1], 'y': [2], 'z': [3]}) assert actual.equals(expected), "pivot_wider with id failed" def test_pivot_wider3(): """Can pivot cols to wide with values filled""" - df = tp.Tibble({'id': _repeat(1, 3), 'label': ['x', 'y', 'z'], 'val': range(1, 4)}) + df = tp.tibble({'id': _repeat(1, 3), 'label': ['x', 'y', 'z'], 'val': range(1, 4)}) actual = ( df.pivot_wider(names_from = 'label', values_from = 'id', values_fill = 0) .select('val', 'x', 'y', 'z').arrange('val') ) - expected = tp.Tibble({'val': [1, 2, 3], 'x': [1, 0, 0], 'y': [0, 1, 0], 'z': [0, 0, 1]}) + expected = tp.tibble({'val': [1, 2, 3], 'x': [1, 0, 0], 'y': [0, 1, 0], 'z': [0, 0, 1]}) assert actual.equals(expected), "pivot_wider with values filled failed" def test_pivot_wider4(): """Can pivot cols to wide with values filled - doesn't affect id col""" - df = tp.Tibble(id = [None, 2], var = ["x", "y"], val = [1, 2]) + df = tp.tibble(id = [None, 2], var = ["x", "y"], val = [1, 2]) actual = ( df.pivot_wider(names_from = "var", values_from = "val", values_fill = 0) .select('id', 'x', 'y') .arrange('y') ) - expected = tp.Tibble({'id': [None, 2], 'x': [1, 0], 'y': [0, 2]}) + expected = tp.tibble({'id': [None, 2], 'x': [1, 0], 'y': [0, 2]}) assert actual.equals(expected), "pivot_wider with values filled failed" def test_print(): """Printing doesn't alter class of df""" - df = tp.Tibble(x = range(3), y = range(3)) + df = tp.tibble(x = range(3), y = range(3)) repr(df) print(df) - assert isinstance(df, tp.Tibble), "Printing failed" + assert isinstance(df, tp.tibble), "Printing failed" def test_pull(): """Can use pull""" - df = tp.Tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) + df = tp.tibble({'x': _repeat(1, 3), 'y': _repeat(2, 3)}) actual = df.pull('x') expected = df.to_polars().get_column('x') assert actual.equals(expected), "pull failed" def test_relocate_before(): """Can relocate before columns""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) actual = df.relocate('y', 'z', before = 'x') expected = df.select('y', 'z', 'x') assert actual.equals(expected), "relocate before failed" - assert type(actual) == tp.Tibble, "relocate didn't return a Tibble" + assert type(actual) == tp.tibble, "relocate didn't return a tibble" def test_relocate_after(): """Can relocate after columns""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) actual = df.relocate('z', 'y', after = 'x') expected = df.select('x', 'z', 'y') assert actual.equals(expected), "relocate after failed" def test_relocate_empty(): """Can relocate to the beginning""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) actual = df.relocate('z', 'y') expected = df.select('z', 'y', 'x') assert actual.equals(expected), "relocate to the beginning failed" def test_rename_dplyr_kwargs(): """Can rename - dplyr interface (kwargs)""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) actual = df.rename(new_x = 'x', new_y = 'y') - expected = tp.Tibble({'new_x': range(3), 'new_y': range(3), 'z': range(3)}) + expected = tp.tibble({'new_x': range(3), 'new_y': range(3), 'z': range(3)}) assert actual.equals(expected), "dplyr rename failed" - assert type(actual) == tp.Tibble, "rename didn't return a Tibble" + assert type(actual) == tp.tibble, "rename didn't return a tibble" def test_rename_dplyr_strings(): """Can rename - dplyr interface (strings)""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) actual = df.rename('new_x', 'x', 'new_y', 'y') - expected = tp.Tibble({'new_x': range(3), 'new_y': range(3), 'z': range(3)}) + expected = tp.tibble({'new_x': range(3), 'new_y': range(3), 'z': range(3)}) assert actual.equals(expected), "dplyr rename failed" def test_rename_pandas(): """Can rename - pandas interface""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) actual = df.rename({'x': 'new_x', 'y': 'new_y'}) - expected = tp.Tibble({'new_x': range(3), 'new_y': range(3), 'z': range(3)}) + expected = tp.tibble({'new_x': range(3), 'new_y': range(3), 'z': range(3)}) assert actual.equals(expected), "pandas rename failed" def test_replace_null(): """Can replace nulls""" - df = tp.Tibble(x = [0, None], y = [None, None]) + df = tp.tibble(x = [0, None], y = [None, None]) actual = df.replace_null(dict(x = 1, y = 2)) - expected = tp.Tibble(x = [0, 1], y = [2, 2]) + expected = tp.tibble(x = [0, 1], y = [2, 2]) assert actual.equals(expected), "replace_null method failed" - assert type(actual) == tp.Tibble, "replace_null didn't return a Tibble" + assert type(actual) == tp.tibble, "replace_null didn't return a tibble" def test_set_names(): """Can set_names""" - df = tp.Tibble(x = range(3), y = range(3)) + df = tp.tibble(x = range(3), y = range(3)) actual = df.set_names(['a', 'b']) - expected = tp.Tibble(a = range(3), b = range(3)) + expected = tp.tibble(a = range(3), b = range(3)) assert actual.equals(expected), "set_names failed" - assert type(actual) == tp.Tibble, "set_names didn't return a Tibble" + assert type(actual) == tp.tibble, "set_names didn't return a tibble" def test_select(): """Can select columns""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) actual = df.select('x', 'z') expected = df[['x', 'z']] assert actual.equals(expected), "select failed" - assert type(actual) == tp.Tibble, "select didn't return a Tibble" + assert type(actual) == tp.tibble, "select didn't return a tibble" def test_separate(): """Can separate""" - df = tp.Tibble(x = ['a_a', 'b_b', 'c_c']) + df = tp.tibble(x = ['a_a', 'b_b', 'c_c']) actual = df.separate('x', into = ['left', 'right']).arrange('left') - expected = tp.Tibble(left = ['a', 'b', 'c'], right = ['a', 'b', 'c']) + expected = tp.tibble(left = ['a', 'b', 'c'], right = ['a', 'b', 'c']) assert actual.equals(expected), "separate failed" - assert type(actual) == tp.Tibble, "separate didn't return a Tibble" + assert type(actual) == tp.tibble, "separate didn't return a tibble" def test_slice(): """Can slice""" - df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']}) actual = df.slice(0, 2) - expected = tp.Tibble({'x': [0, 2], 'y': ['a', 'b']}) + expected = tp.tibble({'x': [0, 2], 'y': ['a', 'b']}) assert actual.equals(expected), "slice failed" - assert type(actual) == tp.Tibble, "slice didn't return a Tibble" + assert type(actual) == tp.tibble, "slice didn't return a tibble" def test_slice_head(): """Can slice_head""" - df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']}) actual = df.slice_head(2) - expected = tp.Tibble({'x': [0, 1], 'y': ['a', 'a']}) + expected = tp.tibble({'x': [0, 1], 'y': ['a', 'a']}) assert actual.equals(expected), "slice_head failed" - assert type(actual) == tp.Tibble, "slice_head didn't return a Tibble" + assert type(actual) == tp.tibble, "slice_head didn't return a tibble" def test_slice_tail(): """Can slice_tail by""" - df = tp.Tibble({'x': range(3), 'y': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': ['a', 'a', 'b']}) actual = df.slice_tail(2) - expected = tp.Tibble({'x': [1, 2], 'y': ['a', 'b']}) + expected = tp.tibble({'x': [1, 2], 'y': ['a', 'b']}) assert actual.equals(expected), "slice_tail failed" - assert type(actual) == tp.Tibble, "slice_tail didn't return a Tibble" + assert type(actual) == tp.tibble, "slice_tail didn't return a tibble" def test_summarise(): """Can use summarise alias""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) actual = df.summarise(avg_x = col('x').mean()) - expected = tp.Tibble({'avg_x': [1]}) + expected = tp.tibble({'avg_x': [1]}) assert actual.equals(expected), "summarise failed" def test_summarize(): """Can use summarize""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) actual = df.summarize(avg_x = col('x').mean()) - expected = tp.Tibble({'avg_x': [1]}) + expected = tp.tibble({'avg_x': [1]}) assert actual.equals(expected), "ungrouped summarize failed" - assert type(actual) == tp.Tibble, "summarize didn't return a Tibble" + assert type(actual) == tp.tibble, "summarize didn't return a tibble" def test_summarize_grouped(): """Can use summarize by group""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': ['a', 'a', 'b']}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': ['a', 'a', 'b']}) actual = df.summarize(avg_x = col('x').mean(), _by = 'z').arrange('z') - expected = tp.Tibble(z = ['a', 'b'], avg_x = [.5, 2]) + expected = tp.tibble(z = ['a', 'b'], avg_x = [.5, 2]) assert actual.equals(expected), "grouped summarize failed" def test_summarize_across(): """Can use summarize_across""" - df = tp.Tibble(x = range(3), y = range(3), z = range(3)) + df = tp.tibble(x = range(3), y = range(3), z = range(3)) actual = df.summarize(tp.across(['x', 'y'], tp.max, names_prefix = "max_"), avg_x = col('x').mean()) - expected = tp.Tibble({'max_x': [2], 'max_y': [2], 'avg_x': [1]}) + expected = tp.tibble({'max_x': [2], 'max_y': [2], 'avg_x': [1]}) assert actual.equals(expected), "ungrouped summarize across failed" def test_to_dict(): """Can convert to a dictionary""" - df = tp.Tibble({'x': range(3), 'y': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3)}) assert type(df.to_dict()) == dict def test_to_polars(): """Can convert to a polars DataFrame""" - df = tp.Tibble({'x': range(3), 'y': range(3), 'z': range(3)}) + df = tp.tibble({'x': range(3), 'y': range(3), 'z': range(3)}) assert isinstance(df.to_polars(), pl.DataFrame), "to_polars failed" def test_unite(): """Can unite columns""" - df = tp.Tibble(a = ["a", "a", "a"], b = ["b", "b", "b"], c = range(3)) + df = tp.tibble(a = ["a", "a", "a"], b = ["b", "b", "b"], c = range(3)) actual = df.unite("new_col", ["a", "b"]) - expected = tp.Tibble(new_col = ["a_b"] * 3, c = range(3)) + expected = tp.tibble(new_col = ["a_b"] * 3, c = range(3)) assert actual.equals(expected), "unite failed" - assert type(actual) == tp.Tibble, "unite didn't return a Tibble" + assert type(actual) == tp.tibble, "unite didn't return a tibble" def test_funs_in_a_row(): """Tests if shallow copy is working properly""" - df = tp.Tibble(x = range(3), y = range(3), z = range(3)) + df = tp.tibble(x = range(3), y = range(3), z = range(3)) df.distinct() df.drop('x') df.drop_null() diff --git a/tests/test_tidyselect.py b/tests/test_tidyselect.py index 1870fba..0d6176a 100644 --- a/tests/test_tidyselect.py +++ b/tests/test_tidyselect.py @@ -2,56 +2,56 @@ def test_contains_ignore_case(): """Can find columns that contain and ignores case""" - df = tp.Tibble({'name': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) + df = tp.tibble({'name': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) actual = df.select(tp.contains('M', True)) - expected = tp.Tibble({'name': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) + expected = tp.tibble({'name': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) assert actual.equals(expected), "contains ignore case failed" def test_contains_include_case(): """Can find columns that contain and includes case""" - df = tp.Tibble({'name': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) + df = tp.tibble({'name': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) actual = df.select(tp.contains('M', ignore_case=False)) - expected = tp.Tibble({'NUMBER': [2, 1, 1]}) + expected = tp.tibble({'NUMBER': [2, 1, 1]}) assert actual.equals(expected), "contains includes case failed" def test_ends_with_ignore_case(): """Can find columns that ends_with and ignores case""" - df = tp.Tibble({'writer': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) + df = tp.tibble({'writer': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) actual = df.select(tp.ends_with('er', True)) - expected = tp.Tibble({'writer': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) + expected = tp.tibble({'writer': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) assert actual.equals(expected), "ends_with ignore case failed" def test_ends_with_include_case(): """Can find columns that ends_with and ignores case""" - df = tp.Tibble({'writer': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) + df = tp.tibble({'writer': ['a', 'a', 'b'], 'NUMBER': [2, 1, 1]}) actual = df.select(tp.ends_with('er', ignore_case=False)) - expected = tp.Tibble({'writer': ['a', 'a', 'b']}) + expected = tp.tibble({'writer': ['a', 'a', 'b']}) assert actual.equals(expected), "ends_with ignore case failed" def test_everything(): """Can find all columns""" - df = tp.Tibble({'name': ['a', 'a', 'b'], 'value': [2, 1, 1]}) + df = tp.tibble({'name': ['a', 'a', 'b'], 'value': [2, 1, 1]}) actual = df.select(tp.everything()) - expected = tp.Tibble({'name': ['a', 'a', 'b'], 'value': [2, 1, 1]}) + expected = tp.tibble({'name': ['a', 'a', 'b'], 'value': [2, 1, 1]}) assert actual.equals(expected), "everything failed" def test_starts_with_ignore_case(): """Can find columns that starts_with and ignores case""" - df = tp.Tibble({'name': ['a', 'a', 'b'], 'Number': [2, 1, 1]}) + df = tp.tibble({'name': ['a', 'a', 'b'], 'Number': [2, 1, 1]}) actual = df.select(tp.starts_with('n', True)) - expected = tp.Tibble({'name': ['a', 'a', 'b'], 'Number': [2, 1, 1]}) + expected = tp.tibble({'name': ['a', 'a', 'b'], 'Number': [2, 1, 1]}) assert actual.equals(expected), "starts_with ignore case failed" def test_starts_with_include_case(): """Can find columns that starts_with and includes case""" - df = tp.Tibble({'name': ['a', 'a', 'b'], 'Number': [2, 1, 1]}) + df = tp.tibble({'name': ['a', 'a', 'b'], 'Number': [2, 1, 1]}) actual = df.select(tp.starts_with('n', ignore_case=False)) - expected = tp.Tibble({'name': ['a', 'a', 'b']}) + expected = tp.tibble({'name': ['a', 'a', 'b']}) assert actual.equals(expected), "starts_with include case failed" def test_where(): """Can use where""" - df = tp.Tibble({ + df = tp.tibble({ "string_col": ["a"], "numeric_col": [1] }) diff --git a/tidypolars/__init__.py b/tidypolars/__init__.py index 2aad14c..761f874 100644 --- a/tidypolars/__init__.py +++ b/tidypolars/__init__.py @@ -9,7 +9,7 @@ from .lubridate import * from .reexports import * from .stringr import * -from .tibble import * +from .tibble_df import * from .tidyselect import * __all__ = ( @@ -17,6 +17,6 @@ lubridate.__all__ + reexports.__all__ + stringr.__all__ + - tibble.__all__ + + tibble_df.__all__ + tidyselect.__all__ ) \ No newline at end of file diff --git a/tidypolars/funs.py b/tidypolars/funs.py index 0f8147b..80a912f 100644 --- a/tidypolars/funs.py +++ b/tidypolars/funs.py @@ -1,5 +1,5 @@ import polars as pl -from .tibble import from_polars, Tibble +from .tibble_df import from_polars, tibble from .utils import ( _as_list, _col_expr, @@ -56,7 +56,7 @@ def across(cols, fn = lambda x: x, names_prefix = None): Examples -------- - >>> df = tp.Tibble(x = ['a', 'a', 'b'], y = range(3), z = range(3)) + >>> df = tp.tibble(x = ['a', 'a', 'b'], y = range(3), z = range(3)) >>> df.mutate(across(['y', 'z'], lambda x: x * 2)) >>> df.mutate(across(tp.Int64, lambda x: x * 2, names_prefix = "double_")) >>> df.summarize(across(['y', 'z'], tp.mean), by = 'x') @@ -163,7 +163,7 @@ def between(x, left, right): Examples -------- - >>> df = tp.Tibble(x = range(4)) + >>> df = tp.tibble(x = range(4)) >>> df.filter(tp.between(col('x'), 1, 3)) """ x = _col_expr(x) @@ -180,7 +180,7 @@ def case_when(expr): Examples -------- - >>> df = tp.Tibble(x = range(1, 4)) + >>> df = tp.tibble(x = range(1, 4)) >>> df.mutate( >>> case_x = tp.case_when(col('x') < 2).then(1) >>> .when(col('x') < 3).then(2) @@ -330,7 +330,7 @@ def if_else(condition, true, false): Examples -------- - >>> df = tp.Tibble(x = range(1, 4)) + >>> df = tp.tibble(x = range(1, 4)) >>> df.mutate(if_x = tp.if_else(col('x') < 2, 1, 2)) """ return pl.when(condition).then(true).otherwise(false) @@ -346,7 +346,7 @@ def is_finite(x): Examples -------- - >>> df = tp.Tibble(x = [1.0, float('inf')]) + >>> df = tp.tibble(x = [1.0, float('inf')]) >>> df.filter(tp.is_finite(col('x'))) """ x = _col_expr(x) @@ -365,7 +365,7 @@ def is_in(x, y): Examples -------- - >>> df = tp.Tibble(x = range(3)) + >>> df = tp.tibble(x = range(3)) >>> df.filter(tp.is_in(col('x'), [1, 2])) """ x = _col_expr(x) @@ -382,7 +382,7 @@ def is_infinite(x): Examples -------- - >>> df = tp.Tibble(x = [1.0, float('inf')]) + >>> df = tp.tibble(x = [1.0, float('inf')]) >>> df.filter(tp.is_infinite(col('x'))) """ x = _col_expr(x) @@ -399,7 +399,7 @@ def is_not(x): Examples -------- - >>> df = tp.Tibble(x = range(3)) + >>> df = tp.tibble(x = range(3)) >>> df.filter(tp.not_(col('x') < 2)) """ x = _col_expr(x) @@ -416,7 +416,7 @@ def is_nan(x): Examples -------- - >>> df = tp.Tibble(x = range(3)) + >>> df = tp.tibble(x = range(3)) >>> df.filter(tp.is_nan(col('x'))) """ x = _col_expr(x) @@ -435,7 +435,7 @@ def is_not_in(x, y): Examples -------- - >>> df = tp.Tibble(x = range(3)) + >>> df = tp.tibble(x = range(3)) >>> df.filter(tp.is_not_in(col('x'), [1, 2])) """ x = _col_expr(x) @@ -452,7 +452,7 @@ def is_not_null(x): Examples -------- - >>> df = tp.Tibble(x = range(3)) + >>> df = tp.tibble(x = range(3)) >>> df.filter(tp.is_not_in(col('x'), [1, 2])) """ x = _col_expr(x) @@ -469,7 +469,7 @@ def is_null(x): Examples -------- - >>> df = tp.Tibble(x = range(3)) + >>> df = tp.tibble(x = range(3)) >>> df.filter(tp.is_not_in(col('x'), [1, 2])) """ x = _col_expr(x) @@ -734,7 +734,7 @@ def rep(x, times = 1): out = x.to_list() elif _is_list(x): out = x - elif isinstance(x, Tibble): + elif isinstance(x, tibble): out = pl.concat([x for i in range(times)]).pipe(from_polars) elif _is_iterable(x): out = list(x) @@ -755,7 +755,7 @@ def replace_null(x, replace = None): Examples -------- - >>> df = tp.Tibble(x = [0, None], y = [None, None]) + >>> df = tp.tibble(x = [0, None], y = [None, None]) >>> df.mutate(x = tp.replace_null(col('x'), 1)) """ if replace == None: return x diff --git a/tidypolars/lubridate.py b/tidypolars/lubridate.py index dd3c916..4d08b50 100644 --- a/tidypolars/lubridate.py +++ b/tidypolars/lubridate.py @@ -32,7 +32,7 @@ def as_date(x, format = None): Examples -------- - >>> df = tp.Tibble(x = ['2021-01-01', '2021-10-01']) + >>> df = tp.tibble(x = ['2021-01-01', '2021-10-01']) >>> df.mutate(date_x = tp.as_date(col('x'))) """ x = _col_expr(x) @@ -51,7 +51,7 @@ def as_datetime(x, format = None): Examples -------- - >>> df = tp.Tibble(x = ['2021-01-01', '2021-10-01']) + >>> df = tp.tibble(x = ['2021-01-01', '2021-10-01']) >>> df.mutate(date_x = tp.as_datetime(col('x'))) """ x = _col_expr(x) diff --git a/tidypolars/tibble.py b/tidypolars/tibble_df.py similarity index 92% rename from tidypolars/tibble.py rename to tidypolars/tibble_df.py index cbc9212..1572506 100644 --- a/tidypolars/tibble.py +++ b/tidypolars/tibble_df.py @@ -15,12 +15,12 @@ from operator import not_ __all__ = [ - "Tibble", + "tibble", "desc", "from_pandas", "from_polars" ] -class Tibble(pl.DataFrame): +class tibble(pl.DataFrame): """ A data frame object that provides methods familiar to R tidyverse users. """ @@ -90,7 +90,7 @@ def arrange(self, *args): Examples -------- - >>> df = tp.Tibble({'x': ['a', 'a', 'b'], 'y': range(3)}) + >>> df = tp.tibble({'x': ['a', 'a', 'b'], 'y': range(3)}) >>> # Arrange in ascending order >>> df.arrange('x', 'y') ... @@ -107,13 +107,13 @@ def bind_cols(self, *args): Parameters ---------- - df : Tibble + df : tibble Data frame to bind Examples -------- - >>> df1 = tp.Tibble({'x': ['a', 'a', 'b'], 'y': range(3)}) - >>> df2 = tp.Tibble({'a': ['c', 'c', 'c'], 'b': range(4, 7)}) + >>> df1 = tp.tibble({'x': ['a', 'a', 'b'], 'y': range(3)}) + >>> df2 = tp.tibble({'a': ['c', 'c', 'c'], 'b': range(4, 7)}) >>> df1.bind_cols(df2) """ frames = _as_list(args) @@ -128,13 +128,13 @@ def bind_rows(self, *args): Parameters ---------- - *args : Tibble, list + *args : tibble, list Data frames to bind by row Examples -------- - >>> df1 = tp.Tibble({'x': ['a', 'a', 'b'], 'y': range(3)}) - >>> df2 = tp.Tibble({'x': ['c', 'c', 'c'], 'y': range(4, 7)}) + >>> df1 = tp.tibble({'x': ['a', 'a', 'b'], 'y': range(3)}) + >>> df2 = tp.tibble({'x': ['c', 'c', 'c'], 'y': range(4, 7)}) >>> df1.bind_rows(df2) """ frames = _as_list(args) @@ -161,7 +161,7 @@ def count(self, *args, sort = False, name = 'n'): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': ['a', 'a', 'b']}) >>> df.count() >>> df.count('b') """ @@ -185,7 +185,7 @@ def distinct(self, *args): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': ['a', 'a', 'b']}) >>> df.distinct() >>> df.distinct('b') """ @@ -224,7 +224,7 @@ def drop_null(self, *args): Examples -------- - >>> df = tp.Tibble(x = [1, None, 3], y = [None, 'b', 'c'], z = range(3)} + >>> df = tp.tibble(x = [1, None, 3], y = [None, 'b', 'c'], z = range(3)} >>> df.drop_null() >>> df.drop_null('x', 'y') """ @@ -236,7 +236,7 @@ def drop_null(self, *args): return out.pipe(from_polars) def equals(self, other, null_equal = True): - """Check if two Tibbles are equal""" + """Check if two tibbles are equal""" df = self.to_polars() other = other.to_polars() return df.equals(other, null_equal = null_equal) @@ -260,7 +260,7 @@ def fill(self, *args, direction = 'down', _by = None): Examples -------- - >>> df = tp.Tibble({'a': [1, None, 3, 4, 5], + >>> df = tp.tibble({'a': [1, None, 3, 4, 5], ... 'b': [None, 2, None, None, 5], ... 'groups': ['a', 'a', 'a', 'b', 'b']}) >>> df.fill('a', 'b') @@ -304,7 +304,7 @@ def filter(self, *args, Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': ['a', 'a', 'b']}) >>> df.filter(col('a') < 2, col('b') == 'a') >>> df.filter((col('a') < 2) & (col('b') == 'a')) >>> df.filter(col('a') <= tp.mean(col('a')), by = 'b') @@ -325,7 +325,7 @@ def inner_join(self, df, left_on = None, right_on = None, on = None, suffix = '_ Parameters ---------- - df : Tibble + df : tibble Lazy DataFrame to join with. left_on : str, list Join column(s) of the left DataFrame. @@ -352,7 +352,7 @@ def left_join(self, df, left_on = None, right_on = None, on = None, suffix = '_r Parameters ---------- - df : Tibble + df : tibble Lazy DataFrame to join with. left_on : str, list Join column(s) of the left DataFrame. @@ -390,7 +390,7 @@ def mutate(self, *args, Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3), c = ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': range(3), c = ['a', 'a', 'b']}) >>> df.mutate(double_a = col('a') * 2, ... a_plus_b = col('a') + col('b')) >>> df.mutate(row_num = row_number(), by = 'c') @@ -445,7 +445,7 @@ def full_join(self, df, left_on = None, right_on = None, on = None, suffix: str Parameters ---------- - df : Tibble + df : tibble Lazy DataFrame to join with. left_on : str, list Join column(s) of the left DataFrame. @@ -485,7 +485,7 @@ def pivot_longer(self, Examples -------- - >>> df = tp.Tibble({'id': ['id1', 'id2'], 'a': [1, 2], 'b': [1, 2]}) + >>> df = tp.tibble({'id': ['id1', 'id2'], 'a': [1, 2], 'b': [1, 2]}) >>> df.pivot_longer(cols = ['a', 'b']) >>> df.pivot_longer(cols = ['a', 'b'], names_to = 'stuff', values_to = 'things') """ @@ -523,7 +523,7 @@ def pivot_wider(self, Examples -------- - >>> df = tp.Tibble({'id': [1, 1], 'variable': ['a', 'b'], 'value': [1, 2]}) + >>> df = tp.tibble({'id': [1, 1], 'variable': ['a', 'b'], 'value': [1, 2]}) >>> df.pivot_wider(names_from = 'variable', values_from = 'value') """ if id_cols == None: @@ -564,7 +564,7 @@ def pull(self, var = None): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3)) + >>> df = tp.tibble({'a': range(3), 'b': range(3)) >>> df.pull('a') """ if var == None: @@ -583,7 +583,7 @@ def relocate(self, *args, before = None, after = None): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) >>> df.relocate('a', before = 'c') >>> df.relocate('b', after = 'c') """ @@ -633,7 +633,7 @@ def rename(self, *args, **kwargs): Examples -------- - >>> df = tp.Tibble({'x': range(3), 't': range(3), 'z': ['a', 'a', 'b']}) + >>> df = tp.tibble({'x': range(3), 't': range(3), 'z': ['a', 'a', 'b']}) >>> df.rename(new_x = 'x') # dplyr interface >>> df.rename({'x': 'new_x'}) # pandas interface """ @@ -665,7 +665,7 @@ def replace_null(self, replace = None): Examples -------- - >>> df = tp.Tibble(x = [0, None], y = [None, None]) + >>> df = tp.tibble(x = [0, None], y = [None, None]) >>> df.replace_null(dict(x = 1, y = 2)) """ if replace == None: return self @@ -691,7 +691,7 @@ def separate(self, sep_col, into, sep = '_', remove = True): Examples -------- - >>> df = tp.Tibble(x = ['a_a', 'b_b', 'c_c']) + >>> df = tp.tibble(x = ['a_a', 'b_b', 'c_c']) >>> df.separate('x', into = ['left', 'right']) """ into_len = len(into) - 1 @@ -722,7 +722,7 @@ def set_names(self, nm = None): Examples -------- - >>> df = tp.Tibble(x = range(3), y = range(3)) + >>> df = tp.tibble(x = range(3), y = range(3)) >>> df.set_names(['a', 'b']) """ if nm == None: nm = self.names @@ -741,7 +741,7 @@ def select(self, *args): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) >>> df.select('a', 'b') >>> df.select(col('a'), col('b')) """ @@ -762,15 +762,15 @@ def slice(self, *args, _by = None): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) >>> df.slice(0, 1) >>> df.slice(0, by = 'c') """ rows = _as_list(args) if _uses_by(_by): - df = super(Tibble, self).group_by(_by).map_groups(lambda x: x.select(pl.all().gather(rows))) + df = super(tibble, self).group_by(_by).map_groups(lambda x: x.select(pl.all().gather(rows))) else: - df = super(Tibble, self).select(pl.all().gather(rows)) + df = super(tibble, self).select(pl.all().gather(rows)) return df.pipe(from_polars) def slice_head(self, n = 5, *, _by = None): @@ -786,15 +786,15 @@ def slice_head(self, n = 5, *, _by = None): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) >>> df.slice_head(2) >>> df.slice_head(1, by = 'c') """ col_order = self.names if _uses_by(_by): - df = super(Tibble, self).group_by(_by).head(n) + df = super(tibble, self).group_by(_by).head(n) else: - df = super(Tibble, self).head(n) + df = super(tibble, self).head(n) df = df.select(col_order) return df.pipe(from_polars) @@ -811,15 +811,15 @@ def slice_tail(self, n = 5, *, _by = None): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) >>> df.slice_tail(2) >>> df.slice_tail(1, by = 'c') """ col_order = self.names if _uses_by(_by): - df = super(Tibble, self).group_by(_by).tail(n) + df = super(tibble, self).group_by(_by).tail(n) else: - df = super(Tibble, self).tail(n) + df = super(tibble, self).tail(n) df = df.select(col_order) return df.pipe(from_polars) @@ -846,7 +846,7 @@ def summarize(self, *args, Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) >>> df.summarize(avg_a = tp.mean(col('a'))) >>> df.summarize(avg_a = tp.mean(col('a')), ... by = 'c') @@ -855,9 +855,9 @@ def summarize(self, *args, """ exprs = _as_list(args) + _kwargs_as_exprs(kwargs) if _uses_by(_by): - out = super(Tibble, self).group_by(_by).agg(exprs) + out = super(tibble, self).group_by(_by).agg(exprs) else: - out = super(Tibble, self).select(exprs) + out = super(tibble, self).select(exprs) return out.pipe(from_polars) def tail(self, n = 5, *, _by = None): @@ -920,7 +920,7 @@ def unite(self, col = "_united", unite_cols = [], sep = "_", remove = True): Examples -------- - >>> df = tp.Tibble(a = ["a", "a", "a"], b = ["b", "b", "b"], c = range(3)) + >>> df = tp.tibble(a = ["a", "a", "a"], b = ["b", "b", "b"], c = range(3)) >>> df.unite("united_col", unite_cols = ["a", "b"]) """ if len(unite_cols) == 0: @@ -961,29 +961,29 @@ class DescCol(pl.Expr): def from_polars(df): """ - Convert from polars DataFrame to Tibble + Convert from polars DataFrame to tibble Parameters ---------- df : DataFrame - pl.DataFrame to convert to a Tibble + pl.DataFrame to convert to a tibble Examples -------- >>> tp.from_polars(df) """ df = copy.copy(df) - df.__class__ = Tibble + df.__class__ = tibble return df def from_pandas(df): """ - Convert from pandas DataFrame to Tibble + Convert from pandas DataFrame to tibble Parameters ---------- df : DataFrame - pd.DataFrame to convert to a Tibble + pd.DataFrame to convert to a tibble Examples -------- diff --git a/tidypolars/tidyselect.py b/tidypolars/tidyselect.py index 464a552..11a890f 100644 --- a/tidypolars/tidyselect.py +++ b/tidypolars/tidyselect.py @@ -17,7 +17,7 @@ def contains(match, ignore_case = True): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) >>> df.select(contains('c')) """ if ignore_case == True: @@ -40,7 +40,7 @@ def ends_with(match, ignore_case = True): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b_code': range(3), 'c_code': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b_code': range(3), 'c_code': ['a', 'a', 'b']}) >>> df.select(ends_with('code')) """ if ignore_case == True: @@ -55,7 +55,7 @@ def everything(): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'b': range(3), 'c': ['a', 'a', 'b']}) >>> df.select(everything()) """ return cs.all() @@ -73,7 +73,7 @@ def starts_with(match, ignore_case = True): Examples -------- - >>> df = tp.Tibble({'a': range(3), 'add': range(3), 'sub': ['a', 'a', 'b']}) + >>> df = tp.tibble({'a': range(3), 'add': range(3), 'sub': ['a', 'a', 'b']}) >>> df.select(starts_with('a')) """ if ignore_case == True: