From 9a758df93e2aa4fb37e423fdd42f13671ae1c319 Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Fri, 26 Jul 2024 14:23:59 -0500 Subject: [PATCH] generalize the astype operation to be version agnostic (#374) --- tests/unit/io/test_io.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/io/test_io.py b/tests/unit/io/test_io.py index a7ee8d15d..f459662d7 100644 --- a/tests/unit/io/test_io.py +++ b/tests/unit/io/test_io.py @@ -579,6 +579,7 @@ def test_hive_partitioned_data(tmpdir, cpu): # Make sure the directory structure is hive-like df_expect = ddf.compute() df_expect = df_expect.sort_values(["id", "x", "y"]).reset_index(drop=True) + ts_dtype = df_expect["timestamp"].dtype timestamp_check = df_expect["timestamp"].iloc[0] name_check = df_expect["name"].iloc[0] result_paths = glob.glob( @@ -596,7 +597,7 @@ def test_hive_partitioned_data(tmpdir, cpu): # Read back with dask.dataframe and check the data df_check = dd.read_parquet(path, engine="pyarrow").compute() df_check["name"] = df_check["name"].astype("object") - df_check["timestamp"] = df_check["timestamp"].astype("int64") + df_check["timestamp"] = df_check["timestamp"].astype(ts_dtype) df_check = df_check.sort_values(["id", "x", "y"]).reset_index(drop=True) for col in df_expect: # Order of columns can change after round-trip partitioning @@ -605,7 +606,7 @@ def test_hive_partitioned_data(tmpdir, cpu): # Read back with NVT and check the data df_check = merlin.io.Dataset(path, engine="parquet").to_ddf().compute() df_check["name"] = df_check["name"].astype("object") - df_check["timestamp"] = df_check["timestamp"].astype("int64") + df_check["timestamp"] = df_check["timestamp"].astype(ts_dtype) df_check = df_check.sort_values(["id", "x", "y"]).reset_index(drop=True) for col in df_expect: # Order of columns can change after round-trip partitioning