From 361d6bc0fdb86a5076ec517994ed997440051bbe Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 14:10:44 +0900 Subject: [PATCH 01/12] support cast from list-like to string --- .../compute/kernels/scalar_cast_string.cc | 85 +++++++++++++++++++ cpp/src/arrow/scalar_test.cc | 16 ++-- 2 files changed, 91 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index dc3fe29a3dfae..f83e658fe9404 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -510,6 +510,90 @@ void AddBinaryToFixedSizeBinaryCast(CastFunction* func) { AddBinaryToFixedSizeBinaryCast(func); } +template +struct ListLikeToStringCastFunctor { + using BuilderType = typename TypeTraits::BuilderType; + + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const ArraySpan& input = batch[0].array; + + BuilderType builder(ctx->memory_pool()); + RETURN_NOT_OK(builder.Reserve(input.length)); + + std::string type_info = input.type->ToString(true); + const ArraySpan& values = input.child_data[0]; + const auto* offsets = input.GetValues(1); + + int list_size = -1; + if (input.type->id() == Type::FIXED_SIZE_LIST) { + list_size = checked_cast(*input.type).list_size(); + } + + for (int64_t i = 0; i < input.length; ++i) { + if (!input.IsValid(i)) { + RETURN_NOT_OK(builder.Append("null")); + continue; + } + + std::ostringstream ss; + ss << type_info << "["; + + int64_t start, end; + if (input.type->id() == Type::FIXED_SIZE_LIST) { + start = i * list_size; + end = start + list_size; + } else { + start = offsets[i]; + end = offsets[i + 1]; + } + + for (int64_t j = start; j < end; ++j) { + if (j != start) { + ss << ", "; + } + if (values.IsValid(j)) { + ss << std::to_string(values.GetValues(1)[j]); + } else { + ss << "null"; + } + } + ss << "]"; + RETURN_NOT_OK(builder.Append(ss.str())); + } + + std::shared_ptr output_array; + RETURN_NOT_OK(builder.Finish(&output_array)); + out->value = output_array->data(); + return Status::OK(); + } +}; + +template +void AddListLikeToStringCasts(CastFunction* func) { + auto out_ty = TypeTraits::type_singleton(); + + DCHECK_OK(func->AddKernel(Type::LIST, {InputType(Type::LIST)}, out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::LARGE_LIST, {InputType(Type::LARGE_LIST)}, out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::LIST_VIEW, {InputType(Type::LIST_VIEW)}, out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::LARGE_LIST_VIEW, {InputType(Type::LARGE_LIST_VIEW)}, + out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::FIXED_SIZE_LIST, {InputType(Type::FIXED_SIZE_LIST)}, + out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::MAP, {InputType(Type::MAP)}, out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); +} + } // namespace std::vector> GetBinaryLikeCasts() { @@ -528,6 +612,7 @@ std::vector> GetBinaryLikeCasts() { AddDecimalToStringCasts(cast_string.get()); AddTemporalToStringCasts(cast_string.get()); AddBinaryToBinaryCast(cast_string.get()); + AddListLikeToStringCasts(cast_string.get()); auto cast_large_string = std::make_shared("cast_large_string", Type::LARGE_STRING); diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc index 104a5697b5727..ce271fdcf19d5 100644 --- a/cpp/src/arrow/scalar_test.cc +++ b/cpp/src/arrow/scalar_test.cc @@ -1292,11 +1292,9 @@ class TestListLikeScalar : public ::testing::Test { auto invalid_cast_type = fixed_size_list(value_->type(), 5); CheckListCastError(scalar, invalid_cast_type); - // Cast() function doesn't support casting list-like to string, use Scalar::CastTo() - // instead. - ASSERT_OK_AND_ASSIGN(auto casted_str, scalar.CastTo(utf8())); - ASSERT_EQ(casted_str->type->id(), utf8()->id()); - ASSERT_EQ(casted_str->ToString(), scalar.ToString()); + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, utf8())); + ASSERT_EQ(casted_str.scalar()->type->id(), utf8()->id()); + ASSERT_EQ(casted_str.scalar()->ToString(), scalar.ToString()); } protected: @@ -1337,11 +1335,9 @@ TEST(TestFixedSizeListScalar, Cast) { auto invalid_cast_type = fixed_size_list(int16(), 4); CheckListCastError(scalar, invalid_cast_type); - // Cast() function doesn't support casting list-like to string, use Scalar::CastTo() - // instead. - ASSERT_OK_AND_ASSIGN(auto casted_str, scalar.CastTo(utf8())); - ASSERT_EQ(casted_str->type->id(), utf8()->id()); - ASSERT_EQ(casted_str->ToString(), scalar.ToString()); + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, utf8())); + ASSERT_EQ(casted_str.scalar()->type->id(), utf8()->id()); + ASSERT_EQ(casted_str.scalar()->ToString(), scalar.ToString()); } TEST(TestMapScalar, Basics) { From 2e72e87e2d5a37551aa79a62b599dc061b2b413a Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 16:30:14 +0900 Subject: [PATCH 02/12] add test for FSL, List and Map to String --- .../compute/kernels/scalar_cast_string.cc | 12 +- .../arrow/compute/kernels/scalar_cast_test.cc | 119 +++++++++++++++++- 2 files changed, 123 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index f83e658fe9404..3308b7c8622ef 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -547,16 +547,24 @@ struct ListLikeToStringCastFunctor { end = offsets[i + 1]; } - for (int64_t j = start; j < end; ++j) { + auto append_value = [&](int64_t j) -> Status { if (j != start) { ss << ", "; } if (values.IsValid(j)) { - ss << std::to_string(values.GetValues(1)[j]); + std::shared_ptr value_scalar; + RETURN_NOT_OK(values.ToArray()->GetScalar(j).Value(&value_scalar)); + ss << value_scalar->ToString(); } else { ss << "null"; } + return Status::OK(); + }; + + for (int64_t j = start; j < end; ++j) { + RETURN_NOT_OK(append_value(j)); } + ss << "]"; RETURN_NOT_OK(builder.Append(ss.str())); } diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index a6d7f6097b59b..7475609f9be10 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2275,10 +2275,6 @@ TEST(Cast, BooleanToString) { TEST(Cast, ListToPrimitive) { ASSERT_RAISES(NotImplemented, Cast(*ArrayFromJSON(list(int8()), "[[1, 2], [3, 4]]"), uint8())); - - ASSERT_RAISES( - NotImplemented, - Cast(*ArrayFromJSON(list(binary()), R"([["1", "2"], ["3", "4"]])"), utf8())); } using make_list_t = std::shared_ptr(const std::shared_ptr&); @@ -2429,6 +2425,40 @@ TEST(Cast, FSLToList) { CheckCast(fsl_int32, ArrayFromJSON(fixed_size_list(int16(), 1), "[[32689]]"), options); } +TEST(Cast, FSLToString) { + auto CheckFSLToStringCast = [](const std::shared_ptr& fsl_type, + const std::string& fsl_json, + const std::string& expected_str) { + std::shared_ptr src = ArrayFromJSON(fsl_type, fsl_json); + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, utf8())); + + std::shared_ptr expected_array = ArrayFromJSON(utf8(), expected_str); + ASSERT_TRUE(casted_str->Equals(expected_array)) << casted_str->ToString(); + }; + + // Example with int32 list + std::shared_ptr fsl_type = fixed_size_list(int32(), 3); + const std::string fsl_json = R"([[1, 2, 3], [4, 5, 6], [7, null, 8], null])"; + const std::string expected_str = + "[\"fixed_size_list[3][1, 2, 3]\", " + "\"fixed_size_list[3][4, 5, 6]\", " + "\"fixed_size_list[3][7, null, 8]\", " + "\"null\"]"; + + CheckFSLToStringCast(fsl_type, fsl_json, expected_str); + + // Example with nested fixed_size_list of size 2 + fsl_type = fixed_size_list(fixed_size_list(int32(), 2), 2); + const std::string nested_fsl_json = R"([[[1, 2], [3, 4]], [[null, 5], null]])"; + const std::string expected_nested_str = + "[\"fixed_size_list[2]>[2]" + "[fixed_size_list[2][1, 2], fixed_size_list[2][3, 4]]\", " + "\"fixed_size_list[2]>[2]" + "[fixed_size_list[2][null, 5], null]\"]"; + + CheckFSLToStringCast(fsl_type, nested_fsl_json, expected_nested_str); +} + TEST(Cast, ListToFSL) { CheckCastList(list(int16()), fixed_size_list(int16(), 2), "[[0, 1], [2, 3], null, [null, 5], null]"); @@ -2476,6 +2506,40 @@ TEST(Cast, ListToFSL) { CastOptions::Safe(fixed_size_list(int32(), 3)))); } +TEST(Cast, ListToString) { + auto CheckListToStringCast = [](const std::shared_ptr& list_type, + const std::string& list_json, + const std::string& expected_str) { + std::shared_ptr src = ArrayFromJSON(list_type, list_json); + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, utf8())); + + std::shared_ptr expected_array = ArrayFromJSON(utf8(), expected_str); + ASSERT_TRUE(casted_str->Equals(expected_array)) << casted_str->ToString(); + }; + + // Example with int32 list + std::shared_ptr list_type = list(int32()); + const std::string list_json = R"([[1, 2, 3], [4, 5], [6], []])"; + const std::string expected_str = + R"(["list[1, 2, 3]", + "list[4, 5]", + "list[6]", + "list[]"])"; + + CheckListToStringCast(list_type, list_json, expected_str); + + // Example with nested list of int32 + list_type = list(list(int32())); + const std::string nested_list_json = R"([[[1, 2], [3, 4]], [[5], [6, 7]], [[]], []])"; + const std::string expected_nested_str = + R"(["list>[list[1, 2], list[3, 4]]", + "list>[list[5], list[6, 7]]", + "list>[list[]]", + "list>[]"])"; + + CheckListToStringCast(list_type, nested_list_json, expected_nested_str); +} + TEST(Cast, CastMap) { const std::string map_json = "[[[\"x\", 1], [\"y\", 8], [\"z\", 9]], [[\"x\", 6]], [[\"y\", 36]]]"; @@ -2509,9 +2573,9 @@ TEST(Cast, CastMap) { std::shared_ptr dst_type = map(utf8(), field("y", list(field("b", int64())))); std::shared_ptr src = - ArrayFromJSON(src_type, "[[[\"1\", [1,2,3]]], [[\"2\", [4,5,6]]]]"); + ArrayFromJSON(src_type, R"([[["1", [1,2,3]]], [["2", [4,5,6]]]])"); std::shared_ptr dst = - ArrayFromJSON(dst_type, "[[[\"1\", [1,2,3]]], [[\"2\", [4,5,6]]]]"); + ArrayFromJSON(dst_type, R"([[["1", [1,2,3]]], [["2", [4,5,6]]]])"); CheckCast(src, dst); @@ -2524,6 +2588,49 @@ TEST(Cast, CastMap) { Cast(src, dst_type)); } +void CheckMapToStringCast(const std::string& map_json, + const std::string& map_json_nullable, + const std::vector& expected_str, + const std::vector& expected_str_nullable, + const std::shared_ptr& src_type) { + auto check_cast = [&](const std::string& json, + const std::vector& expected) { + std::shared_ptr src = ArrayFromJSON(src_type, json); + for (int64_t i = 0; i < src->length(); ++i) { + ASSERT_OK_AND_ASSIGN(auto scalar, src->GetScalar(i)); + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, utf8())); + ASSERT_EQ(casted_str.scalar()->type->id(), utf8()->id()); + ASSERT_EQ(casted_str.scalar()->ToString(), expected[i]); + } + }; + + check_cast(map_json, expected_str); + check_cast(map_json_nullable, expected_str_nullable); +} + +TEST(Cast, MapToString) { + const std::string map_json = + "[[[\"x\", 1], [\"y\", 8], [\"z\", 9]], [[\"x\", 6]], [[\"y\", 36]]]"; + const std::string map_json_nullable = + "[[[\"x\", 1], [\"y\", null], [\"z\", 9]], null, [[\"y\", 36]]]"; + + const std::vector expected_str = { + "map[{x:string = x, y:int64 = 1}, " + "{x:string = y, y:int64 = 8}, {x:string = z, y:int64 = 9}]", + "map[{x:string = x, y:int64 = 6}]", + "map[{x:string = y, y:int64 = 36}]"}; + + const std::vector expected_str_nullable = { + "map[{x:string = x, y:int64 = 1}, " + "{x:string = y, y:int64 = null}, {x:string = z, y:int64 = 9}]", + "null", "map[{x:string = y, y:int64 = 36}]"}; + + auto src_type = + std::make_shared(field("x", utf8(), false), field("y", int64())); + CheckMapToStringCast(map_json, map_json_nullable, expected_str, expected_str_nullable, + src_type); +} + static void CheckStructToStruct( const std::vector>& value_types) { for (const auto& src_value_type : value_types) { From a6d9955b2e7865afe4b94d312f51e9aaa1824c89 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 17:11:36 +0900 Subject: [PATCH 03/12] update code by self-review --- .../compute/kernels/scalar_cast_string.cc | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index 3308b7c8622ef..fcc951ff4a0bc 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -510,13 +510,30 @@ void AddBinaryToFixedSizeBinaryCast(CastFunction* func) { AddBinaryToFixedSizeBinaryCast(func); } +// ---------------------------------------------------------------------- +// List-like (List, LargeList, ListView, LargeListView, FixedSizeList, Map) to string + template struct ListLikeToStringCastFunctor { using BuilderType = typename TypeTraits::BuilderType; + static Status AppendValue(const ArraySpan& values, std::stringstream& ss, int64_t j, int64_t start) { + if (j != start) { + ss << ", "; + } + if (values.IsValid(j)) { + std::shared_ptr value_scalar; + RETURN_NOT_OK(values.ToArray()->GetScalar(j).Value(&value_scalar)); + ss << value_scalar->ToString(); + } else { + ss << "null"; + } + return Status::OK(); + } + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const ArraySpan& input = batch[0].array; - + auto type_id = input.type->id(); BuilderType builder(ctx->memory_pool()); RETURN_NOT_OK(builder.Reserve(input.length)); @@ -525,7 +542,7 @@ struct ListLikeToStringCastFunctor { const auto* offsets = input.GetValues(1); int list_size = -1; - if (input.type->id() == Type::FIXED_SIZE_LIST) { + if (type_id == Type::FIXED_SIZE_LIST) { list_size = checked_cast(*input.type).list_size(); } @@ -535,11 +552,11 @@ struct ListLikeToStringCastFunctor { continue; } - std::ostringstream ss; + std::stringstream ss; ss << type_info << "["; int64_t start, end; - if (input.type->id() == Type::FIXED_SIZE_LIST) { + if (type_id == Type::FIXED_SIZE_LIST) { start = i * list_size; end = start + list_size; } else { @@ -547,22 +564,8 @@ struct ListLikeToStringCastFunctor { end = offsets[i + 1]; } - auto append_value = [&](int64_t j) -> Status { - if (j != start) { - ss << ", "; - } - if (values.IsValid(j)) { - std::shared_ptr value_scalar; - RETURN_NOT_OK(values.ToArray()->GetScalar(j).Value(&value_scalar)); - ss << value_scalar->ToString(); - } else { - ss << "null"; - } - return Status::OK(); - }; - for (int64_t j = start; j < end; ++j) { - RETURN_NOT_OK(append_value(j)); + RETURN_NOT_OK(AppendValue(values, ss, j, start)); } ss << "]"; From e9db2c391e967b98229bcf6bd9a7d914ee783621 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 17:41:10 +0900 Subject: [PATCH 04/12] apply lint --- cpp/src/arrow/compute/kernels/scalar_cast_string.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index fcc951ff4a0bc..4ddca74ec57c8 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -517,7 +517,8 @@ template struct ListLikeToStringCastFunctor { using BuilderType = typename TypeTraits::BuilderType; - static Status AppendValue(const ArraySpan& values, std::stringstream& ss, int64_t j, int64_t start) { + static Status AppendValue(const ArraySpan& values, std::stringstream& ss, int64_t j, + int64_t start) { if (j != start) { ss << ", "; } From c837a7962c3097ac53275f898ded49b1dd68ee4e Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 18:29:08 +0900 Subject: [PATCH 05/12] add test for large string (large_utf8()) --- .../compute/kernels/scalar_cast_string.cc | 1 + .../arrow/compute/kernels/scalar_cast_test.cc | 25 +++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index 4ddca74ec57c8..eaabb6666b06e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -633,6 +633,7 @@ std::vector> GetBinaryLikeCasts() { AddDecimalToStringCasts(cast_large_string.get()); AddTemporalToStringCasts(cast_large_string.get()); AddBinaryToBinaryCast(cast_large_string.get()); + AddListLikeToStringCasts(cast_large_string.get()); auto cast_fsb = std::make_shared("cast_fixed_size_binary", Type::FIXED_SIZE_BINARY); diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 7475609f9be10..76b9e9db617bf 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2430,10 +2430,12 @@ TEST(Cast, FSLToString) { const std::string& fsl_json, const std::string& expected_str) { std::shared_ptr src = ArrayFromJSON(fsl_type, fsl_json); - ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, utf8())); + for (const auto& out_ty : {utf8(), large_utf8()}) { + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, out_ty)); - std::shared_ptr expected_array = ArrayFromJSON(utf8(), expected_str); - ASSERT_TRUE(casted_str->Equals(expected_array)) << casted_str->ToString(); + std::shared_ptr expected_array = ArrayFromJSON(out_ty, expected_str); + ASSERT_TRUE(casted_str->Equals(expected_array)) << casted_str->ToString(); + } }; // Example with int32 list @@ -2511,10 +2513,11 @@ TEST(Cast, ListToString) { const std::string& list_json, const std::string& expected_str) { std::shared_ptr src = ArrayFromJSON(list_type, list_json); - ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, utf8())); - - std::shared_ptr expected_array = ArrayFromJSON(utf8(), expected_str); - ASSERT_TRUE(casted_str->Equals(expected_array)) << casted_str->ToString(); + for (const auto& out_ty : {utf8(), large_utf8()}) { + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, out_ty)); + std::shared_ptr expected_array = ArrayFromJSON(out_ty, expected_str); + ASSERT_TRUE(casted_str->Equals(expected_array)) << casted_str->ToString(); + } }; // Example with int32 list @@ -2598,9 +2601,11 @@ void CheckMapToStringCast(const std::string& map_json, std::shared_ptr src = ArrayFromJSON(src_type, json); for (int64_t i = 0; i < src->length(); ++i) { ASSERT_OK_AND_ASSIGN(auto scalar, src->GetScalar(i)); - ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, utf8())); - ASSERT_EQ(casted_str.scalar()->type->id(), utf8()->id()); - ASSERT_EQ(casted_str.scalar()->ToString(), expected[i]); + for (const auto& out_ty : {utf8(), large_utf8()}) { + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, out_ty)); + ASSERT_EQ(casted_str.scalar()->type->id(), out_ty->id()); + ASSERT_EQ(casted_str.scalar()->ToString(), expected[i]); + } } }; From 0cee6552975ebe90cb08a35d8e8ce8f8098f40e4 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 18:29:58 +0900 Subject: [PATCH 06/12] rollback --- cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 76b9e9db617bf..22cdf8ec7b802 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2576,9 +2576,9 @@ TEST(Cast, CastMap) { std::shared_ptr dst_type = map(utf8(), field("y", list(field("b", int64())))); std::shared_ptr src = - ArrayFromJSON(src_type, R"([[["1", [1,2,3]]], [["2", [4,5,6]]]])"); + ArrayFromJSON(src_type, "[[[\"1\", [1,2,3]]], [[\"2\", [4,5,6]]]]"); std::shared_ptr dst = - ArrayFromJSON(dst_type, R"([[["1", [1,2,3]]], [["2", [4,5,6]]]])"); + ArrayFromJSON(dst_type, "[[[\"1\", [1,2,3]]], [[\"2\", [4,5,6]]]]"); CheckCast(src, dst); From e3d775d5386715079fc2166430893b88755fee56 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 20:15:30 +0900 Subject: [PATCH 07/12] update unit test for map --- .../arrow/compute/kernels/scalar_cast_test.cc | 114 ++++++++++-------- 1 file changed, 63 insertions(+), 51 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 22cdf8ec7b802..80421939fc878 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2543,16 +2543,15 @@ TEST(Cast, ListToString) { CheckListToStringCast(list_type, nested_list_json, expected_nested_str); } -TEST(Cast, CastMap) { - const std::string map_json = - "[[[\"x\", 1], [\"y\", 8], [\"z\", 9]], [[\"x\", 6]], [[\"y\", 36]]]"; - const std::string map_json_nullable = - "[[[\"x\", 1], [\"y\", null], [\"z\", 9]], null, [[\"y\", 36]]]"; - - auto CheckMapCast = [map_json, - map_json_nullable](const std::shared_ptr& dst_type) { - std::shared_ptr src_type = - std::make_shared(field("x", utf8(), false), field("y", int64())); +class TestMapScalar : public ::testing::Test { + protected: + TestMapScalar() { + src_type = map(utf8(), int64()); + map_json = R"([[["x", 1], ["y", 8], ["z", 9]], [["x", 6]], [["y", 36]]])"; + map_json_nullable = R"([[["x", 1], ["y", null], ["z", 9]], null, [["y", 36]]])"; + } + + void CheckMapCast(const std::shared_ptr& dst_type) { std::shared_ptr src = ArrayFromJSON(src_type, map_json); std::shared_ptr dst = ArrayFromJSON(dst_type, map_json); CheckCast(src, dst); @@ -2560,30 +2559,71 @@ TEST(Cast, CastMap) { src = ArrayFromJSON(src_type, map_json_nullable); dst = ArrayFromJSON(dst_type, map_json_nullable); CheckCast(src, dst); - }; + } + + void CheckMapToStringCast(const std::vector& expected_str, + const std::vector& expected_str_nullable, + const std::shared_ptr& src_type_) { + auto check_cast = [&](const std::string& json, + const std::vector& expected) { + std::shared_ptr src = ArrayFromJSON(src_type_, json); + for (int64_t i = 0; i < src->length(); ++i) { + ASSERT_OK_AND_ASSIGN(auto scalar, src->GetScalar(i)); + for (const auto& out_ty : {utf8(), large_utf8()}) { + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, out_ty)); + ASSERT_EQ(casted_str.scalar()->type->id(), out_ty->id()); + ASSERT_EQ(casted_str.scalar()->ToString(), expected[i]); + } + } + }; + + check_cast(map_json, expected_str); + check_cast(map_json_nullable, expected_str_nullable); + } + protected: + std::string map_json; + std::string map_json_nullable; + std::shared_ptr src_type; +}; + +TEST_F(TestMapScalar, RenameMap) { // Can rename fields CheckMapCast(std::make_shared(field("a", utf8(), false), field("b", int64()))); - // Can map keys and values - CheckMapCast(map(large_utf8(), field("y", int32()))); - // Can cast a map to a to a list> - CheckMapCast(list(struct_({field("a", utf8()), field("b", int64())}))); - // Can cast a map to a large_list> - CheckMapCast(large_list(struct_({field("a", utf8()), field("b", int64())}))); // Can rename nested field names std::shared_ptr src_type = map(utf8(), field("x", list(field("a", int64())))); std::shared_ptr dst_type = map(utf8(), field("y", list(field("b", int64())))); std::shared_ptr src = - ArrayFromJSON(src_type, "[[[\"1\", [1,2,3]]], [[\"2\", [4,5,6]]]]"); + ArrayFromJSON(src_type, R"([[["1", [1,2,3]]], [["2", [4,5,6]]]])"); std::shared_ptr dst = - ArrayFromJSON(dst_type, "[[[\"1\", [1,2,3]]], [[\"2\", [4,5,6]]]]"); + ArrayFromJSON(dst_type, R"([[["1", [1,2,3]]], [["2", [4,5,6]]]])"); CheckCast(src, dst); +} + +TEST_F(TestMapScalar, CastMap) { + // Can map keys and values + CheckMapCast(map(large_utf8(), field("y", int32()))); +} + +TEST_F(TestMapScalar, CastList) { + // Can cast a map to a list> + CheckMapCast(list(struct_({field("a", utf8()), field("b", int64())}))); +} + +TEST_F(TestMapScalar, CastLargeList) { + // Can cast a map to a large_list> + CheckMapCast(large_list(struct_({field("a", utf8()), field("b", int64())}))); +} - // Cannot cast to a list> if there are not exactly 2 fields - dst_type = list( +TEST_F(TestMapScalar, CastListWithInvalidFields) { + std::shared_ptr src_type = map(utf8(), field("x", list(field("a", int64())))); + std::shared_ptr src = + ArrayFromJSON(src_type, R"([[["1", [1,2,3]]], [["2", [4,5,6]]]])"); + + std::shared_ptr dst_type = list( struct_({field("key", int32()), field("value", int64()), field("extra", int64())})); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, @@ -2591,34 +2631,7 @@ TEST(Cast, CastMap) { Cast(src, dst_type)); } -void CheckMapToStringCast(const std::string& map_json, - const std::string& map_json_nullable, - const std::vector& expected_str, - const std::vector& expected_str_nullable, - const std::shared_ptr& src_type) { - auto check_cast = [&](const std::string& json, - const std::vector& expected) { - std::shared_ptr src = ArrayFromJSON(src_type, json); - for (int64_t i = 0; i < src->length(); ++i) { - ASSERT_OK_AND_ASSIGN(auto scalar, src->GetScalar(i)); - for (const auto& out_ty : {utf8(), large_utf8()}) { - ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, out_ty)); - ASSERT_EQ(casted_str.scalar()->type->id(), out_ty->id()); - ASSERT_EQ(casted_str.scalar()->ToString(), expected[i]); - } - } - }; - - check_cast(map_json, expected_str); - check_cast(map_json_nullable, expected_str_nullable); -} - -TEST(Cast, MapToString) { - const std::string map_json = - "[[[\"x\", 1], [\"y\", 8], [\"z\", 9]], [[\"x\", 6]], [[\"y\", 36]]]"; - const std::string map_json_nullable = - "[[[\"x\", 1], [\"y\", null], [\"z\", 9]], null, [[\"y\", 36]]]"; - +TEST_F(TestMapScalar, MapToString) { const std::vector expected_str = { "map[{x:string = x, y:int64 = 1}, " "{x:string = y, y:int64 = 8}, {x:string = z, y:int64 = 9}]", @@ -2632,8 +2645,7 @@ TEST(Cast, MapToString) { auto src_type = std::make_shared(field("x", utf8(), false), field("y", int64())); - CheckMapToStringCast(map_json, map_json_nullable, expected_str, expected_str_nullable, - src_type); + CheckMapToStringCast(expected_str, expected_str_nullable, src_type); } static void CheckStructToStruct( From 61351b4a16aab46fbe8bfbd25d84eab646f74fa8 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 20:38:48 +0900 Subject: [PATCH 08/12] update unit test for cast from map to string --- .../arrow/compute/kernels/scalar_cast_test.cc | 51 +++++++------------ 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 80421939fc878..7019682be57dd 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2561,24 +2561,13 @@ class TestMapScalar : public ::testing::Test { CheckCast(src, dst); } - void CheckMapToStringCast(const std::vector& expected_str, - const std::vector& expected_str_nullable, - const std::shared_ptr& src_type_) { - auto check_cast = [&](const std::string& json, - const std::vector& expected) { - std::shared_ptr src = ArrayFromJSON(src_type_, json); - for (int64_t i = 0; i < src->length(); ++i) { - ASSERT_OK_AND_ASSIGN(auto scalar, src->GetScalar(i)); - for (const auto& out_ty : {utf8(), large_utf8()}) { - ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, out_ty)); - ASSERT_EQ(casted_str.scalar()->type->id(), out_ty->id()); - ASSERT_EQ(casted_str.scalar()->ToString(), expected[i]); - } - } - }; - - check_cast(map_json, expected_str); - check_cast(map_json_nullable, expected_str_nullable); + void CheckStringCast(const std::string& src_str, const std::string& expected_str) { + for (const auto& out_ty : {utf8(), large_utf8()}) { + std::shared_ptr src = ArrayFromJSON(src_type, src_str); + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, out_ty)); + ASSERT_EQ(casted_str->type()->id(), out_ty->id()); + ASSERT_EQ(casted_str->ToString(), expected_str); + } } protected: @@ -2632,20 +2621,18 @@ TEST_F(TestMapScalar, CastListWithInvalidFields) { } TEST_F(TestMapScalar, MapToString) { - const std::vector expected_str = { - "map[{x:string = x, y:int64 = 1}, " - "{x:string = y, y:int64 = 8}, {x:string = z, y:int64 = 9}]", - "map[{x:string = x, y:int64 = 6}]", - "map[{x:string = y, y:int64 = 36}]"}; - - const std::vector expected_str_nullable = { - "map[{x:string = x, y:int64 = 1}, " - "{x:string = y, y:int64 = null}, {x:string = z, y:int64 = 9}]", - "null", "map[{x:string = y, y:int64 = 36}]"}; - - auto src_type = - std::make_shared(field("x", utf8(), false), field("y", int64())); - CheckMapToStringCast(expected_str, expected_str_nullable, src_type); + const std::string expected_str = { + "[\n \"map[{key:string = x, value:int64 = 1}, " + "{key:string = y, value:int64 = 8}, {key:string = z, value:int64 = 9}]\"," + "\n \"map[{key:string = x, value:int64 = 6}]\"," + "\n \"map[{key:string = y, value:int64 = 36}]\"\n]"}; + CheckStringCast(map_json, expected_str); + + const std::string expected_str_nullable = { + "[\n \"map[{key:string = x, value:int64 = 1}, " + "{key:string = y, value:int64 = null}, {key:string = z, value:int64 = 9}]\"," + "\n \"null\",\n \"map[{key:string = y, value:int64 = 36}]\"\n]"}; + CheckStringCast(map_json_nullable, expected_str_nullable); } static void CheckStructToStruct( From 37b6457299f7ba56250aa87bc84f8af210e9cb78 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 20:41:41 +0900 Subject: [PATCH 09/12] rename test --- cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 7019682be57dd..27db75308a447 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2592,22 +2592,22 @@ TEST_F(TestMapScalar, RenameMap) { CheckCast(src, dst); } -TEST_F(TestMapScalar, CastMap) { +TEST_F(TestMapScalar, CastToMap) { // Can map keys and values CheckMapCast(map(large_utf8(), field("y", int32()))); } -TEST_F(TestMapScalar, CastList) { +TEST_F(TestMapScalar, CastToList) { // Can cast a map to a list> CheckMapCast(list(struct_({field("a", utf8()), field("b", int64())}))); } -TEST_F(TestMapScalar, CastLargeList) { +TEST_F(TestMapScalar, CastToLargeList) { // Can cast a map to a large_list> CheckMapCast(large_list(struct_({field("a", utf8()), field("b", int64())}))); } -TEST_F(TestMapScalar, CastListWithInvalidFields) { +TEST_F(TestMapScalar, CastToListWithInvalidFields) { std::shared_ptr src_type = map(utf8(), field("x", list(field("a", int64())))); std::shared_ptr src = ArrayFromJSON(src_type, R"([[["1", [1,2,3]]], [["2", [4,5,6]]]])"); @@ -2620,7 +2620,7 @@ TEST_F(TestMapScalar, CastListWithInvalidFields) { Cast(src, dst_type)); } -TEST_F(TestMapScalar, MapToString) { +TEST_F(TestMapScalar, CastToString) { const std::string expected_str = { "[\n \"map[{key:string = x, value:int64 = 1}, " "{key:string = y, value:int64 = 8}, {key:string = z, value:int64 = 9}]\"," From 1a076c881a908716d5684b8d3a2014d08c563bcb Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 20:51:41 +0900 Subject: [PATCH 10/12] add TestCastToString class to reuse CheckCastToString function --- .../arrow/compute/kernels/scalar_cast_test.cc | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 27db75308a447..64acc5c917771 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -157,6 +157,19 @@ static std::shared_ptr MaskArrayWithNullsAt(std::shared_ptr input, return MakeArray(masked); } +class TestCastToString : public ::testing::Test { + protected: + void CheckCastToString(const std::shared_ptr& src_type, + const std::string& src_str, const std::string& expected_str) { + for (const auto& out_ty : {utf8(), large_utf8()}) { + std::shared_ptr src = ArrayFromJSON(src_type, src_str); + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, out_ty)); + ASSERT_EQ(casted_str->type()->id(), out_ty->id()); + ASSERT_EQ(casted_str->ToString(), expected_str); + } + } +}; + TEST(Cast, CanCast) { auto ExpectCanCast = [](std::shared_ptr from, std::vector> to_set, @@ -2543,37 +2556,28 @@ TEST(Cast, ListToString) { CheckListToStringCast(list_type, nested_list_json, expected_nested_str); } -class TestMapScalar : public ::testing::Test { +class TestMapScalar : public TestCastToString { protected: TestMapScalar() { - src_type = map(utf8(), int64()); + map_type = map(utf8(), int64()); map_json = R"([[["x", 1], ["y", 8], ["z", 9]], [["x", 6]], [["y", 36]]])"; map_json_nullable = R"([[["x", 1], ["y", null], ["z", 9]], null, [["y", 36]]])"; } void CheckMapCast(const std::shared_ptr& dst_type) { - std::shared_ptr src = ArrayFromJSON(src_type, map_json); + std::shared_ptr src = ArrayFromJSON(map_type, map_json); std::shared_ptr dst = ArrayFromJSON(dst_type, map_json); CheckCast(src, dst); - src = ArrayFromJSON(src_type, map_json_nullable); + src = ArrayFromJSON(map_type, map_json_nullable); dst = ArrayFromJSON(dst_type, map_json_nullable); CheckCast(src, dst); } - void CheckStringCast(const std::string& src_str, const std::string& expected_str) { - for (const auto& out_ty : {utf8(), large_utf8()}) { - std::shared_ptr src = ArrayFromJSON(src_type, src_str); - ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, out_ty)); - ASSERT_EQ(casted_str->type()->id(), out_ty->id()); - ASSERT_EQ(casted_str->ToString(), expected_str); - } - } - protected: + std::shared_ptr map_type; std::string map_json; std::string map_json_nullable; - std::shared_ptr src_type; }; TEST_F(TestMapScalar, RenameMap) { @@ -2626,13 +2630,13 @@ TEST_F(TestMapScalar, CastToString) { "{key:string = y, value:int64 = 8}, {key:string = z, value:int64 = 9}]\"," "\n \"map[{key:string = x, value:int64 = 6}]\"," "\n \"map[{key:string = y, value:int64 = 36}]\"\n]"}; - CheckStringCast(map_json, expected_str); + CheckCastToString(map_type, map_json, expected_str); const std::string expected_str_nullable = { "[\n \"map[{key:string = x, value:int64 = 1}, " "{key:string = y, value:int64 = null}, {key:string = z, value:int64 = 9}]\"," "\n \"null\",\n \"map[{key:string = y, value:int64 = 36}]\"\n]"}; - CheckStringCast(map_json_nullable, expected_str_nullable); + CheckCastToString(map_type, map_json_nullable, expected_str_nullable); } static void CheckStructToStruct( From affc2cf91809281d2b1c89ef2f5965076d518f07 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 21:38:01 +0900 Subject: [PATCH 11/12] use TestCastToString class for FSL and List --- .../arrow/compute/kernels/scalar_cast_test.cc | 107 +++++++----------- 1 file changed, 42 insertions(+), 65 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 64acc5c917771..df848adf1a96b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -159,10 +159,11 @@ static std::shared_ptr MaskArrayWithNullsAt(std::shared_ptr input, class TestCastToString : public ::testing::Test { protected: - void CheckCastToString(const std::shared_ptr& src_type, - const std::string& src_str, const std::string& expected_str) { + static void CheckCastToString(const std::shared_ptr& src_type, + const std::string& src_str, + const std::string& expected_str) { + std::shared_ptr src = ArrayFromJSON(src_type, src_str); for (const auto& out_ty : {utf8(), large_utf8()}) { - std::shared_ptr src = ArrayFromJSON(src_type, src_str); ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, out_ty)); ASSERT_EQ(casted_str->type()->id(), out_ty->id()); ASSERT_EQ(casted_str->ToString(), expected_str); @@ -2438,40 +2439,26 @@ TEST(Cast, FSLToList) { CheckCast(fsl_int32, ArrayFromJSON(fixed_size_list(int16(), 1), "[[32689]]"), options); } -TEST(Cast, FSLToString) { - auto CheckFSLToStringCast = [](const std::shared_ptr& fsl_type, - const std::string& fsl_json, - const std::string& expected_str) { - std::shared_ptr src = ArrayFromJSON(fsl_type, fsl_json); - for (const auto& out_ty : {utf8(), large_utf8()}) { - ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, out_ty)); - - std::shared_ptr expected_array = ArrayFromJSON(out_ty, expected_str); - ASSERT_TRUE(casted_str->Equals(expected_array)) << casted_str->ToString(); - } - }; - +TEST_F(TestCastToString, FSLToString) { // Example with int32 list std::shared_ptr fsl_type = fixed_size_list(int32(), 3); const std::string fsl_json = R"([[1, 2, 3], [4, 5, 6], [7, null, 8], null])"; - const std::string expected_str = - "[\"fixed_size_list[3][1, 2, 3]\", " - "\"fixed_size_list[3][4, 5, 6]\", " - "\"fixed_size_list[3][7, null, 8]\", " - "\"null\"]"; - - CheckFSLToStringCast(fsl_type, fsl_json, expected_str); + const std::string expected_str = R"([ + "fixed_size_list[3][1, 2, 3]", + "fixed_size_list[3][4, 5, 6]", + "fixed_size_list[3][7, null, 8]", + "null" +])"; + CheckCastToString(fsl_type, fsl_json, expected_str); // Example with nested fixed_size_list of size 2 fsl_type = fixed_size_list(fixed_size_list(int32(), 2), 2); const std::string nested_fsl_json = R"([[[1, 2], [3, 4]], [[null, 5], null]])"; - const std::string expected_nested_str = - "[\"fixed_size_list[2]>[2]" - "[fixed_size_list[2][1, 2], fixed_size_list[2][3, 4]]\", " - "\"fixed_size_list[2]>[2]" - "[fixed_size_list[2][null, 5], null]\"]"; - - CheckFSLToStringCast(fsl_type, nested_fsl_json, expected_nested_str); + const std::string expected_nested_str = R"([ + "fixed_size_list[2]>[2][fixed_size_list[2][1, 2], fixed_size_list[2][3, 4]]", + "fixed_size_list[2]>[2][fixed_size_list[2][null, 5], null]" +])"; + CheckCastToString(fsl_type, nested_fsl_json, expected_nested_str); } TEST(Cast, ListToFSL) { @@ -2521,39 +2508,28 @@ TEST(Cast, ListToFSL) { CastOptions::Safe(fixed_size_list(int32(), 3)))); } -TEST(Cast, ListToString) { - auto CheckListToStringCast = [](const std::shared_ptr& list_type, - const std::string& list_json, - const std::string& expected_str) { - std::shared_ptr src = ArrayFromJSON(list_type, list_json); - for (const auto& out_ty : {utf8(), large_utf8()}) { - ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(*src, out_ty)); - std::shared_ptr expected_array = ArrayFromJSON(out_ty, expected_str); - ASSERT_TRUE(casted_str->Equals(expected_array)) << casted_str->ToString(); - } - }; - +TEST_F(TestCastToString, ListToString) { // Example with int32 list std::shared_ptr list_type = list(int32()); const std::string list_json = R"([[1, 2, 3], [4, 5], [6], []])"; - const std::string expected_str = - R"(["list[1, 2, 3]", - "list[4, 5]", - "list[6]", - "list[]"])"; - - CheckListToStringCast(list_type, list_json, expected_str); + const std::string expected_str = R"([ + "list[1, 2, 3]", + "list[4, 5]", + "list[6]", + "list[]" +])"; + CheckCastToString(list_type, list_json, expected_str); // Example with nested list of int32 list_type = list(list(int32())); const std::string nested_list_json = R"([[[1, 2], [3, 4]], [[5], [6, 7]], [[]], []])"; - const std::string expected_nested_str = - R"(["list>[list[1, 2], list[3, 4]]", - "list>[list[5], list[6, 7]]", - "list>[list[]]", - "list>[]"])"; - - CheckListToStringCast(list_type, nested_list_json, expected_nested_str); + const std::string expected_nested_str = R"([ + "list>[list[1, 2], list[3, 4]]", + "list>[list[5], list[6, 7]]", + "list>[list[]]", + "list>[]" +])"; + CheckCastToString(list_type, nested_list_json, expected_nested_str); } class TestMapScalar : public TestCastToString { @@ -2625,17 +2601,18 @@ TEST_F(TestMapScalar, CastToListWithInvalidFields) { } TEST_F(TestMapScalar, CastToString) { - const std::string expected_str = { - "[\n \"map[{key:string = x, value:int64 = 1}, " - "{key:string = y, value:int64 = 8}, {key:string = z, value:int64 = 9}]\"," - "\n \"map[{key:string = x, value:int64 = 6}]\"," - "\n \"map[{key:string = y, value:int64 = 36}]\"\n]"}; + const std::string expected_str = R"([ + "map[{key:string = x, value:int64 = 1}, {key:string = y, value:int64 = 8}, {key:string = z, value:int64 = 9}]", + "map[{key:string = x, value:int64 = 6}]", + "map[{key:string = y, value:int64 = 36}]" +])"; CheckCastToString(map_type, map_json, expected_str); - const std::string expected_str_nullable = { - "[\n \"map[{key:string = x, value:int64 = 1}, " - "{key:string = y, value:int64 = null}, {key:string = z, value:int64 = 9}]\"," - "\n \"null\",\n \"map[{key:string = y, value:int64 = 36}]\"\n]"}; + const std::string expected_str_nullable = R"([ + "map[{key:string = x, value:int64 = 1}, {key:string = y, value:int64 = null}, {key:string = z, value:int64 = 9}]", + "null", + "map[{key:string = y, value:int64 = 36}]" +])"; CheckCastToString(map_type, map_json_nullable, expected_str_nullable); } From f1205856e4ff277d8c69ab6536db135858035105 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 22:05:37 +0900 Subject: [PATCH 12/12] update ListToString test case for large_list, list_view, and large_list_view --- .../arrow/compute/kernels/scalar_cast_test.cc | 59 +++++++++++++------ 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index df848adf1a96b..e61d9a73842d1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2509,27 +2509,50 @@ TEST(Cast, ListToFSL) { } TEST_F(TestCastToString, ListToString) { - // Example with int32 list - std::shared_ptr list_type = list(int32()); - const std::string list_json = R"([[1, 2, 3], [4, 5], [6], []])"; - const std::string expected_str = R"([ - "list[1, 2, 3]", - "list[4, 5]", - "list[6]", + // Example with int32 list, large list, list view and large list view + const std::vector, std::string>> list_types = { + {list(int32()), R"([ + "list[1, 2]", + "list[3]", "list[]" -])"; - CheckCastToString(list_type, list_json, expected_str); - - // Example with nested list of int32 - list_type = list(list(int32())); - const std::string nested_list_json = R"([[[1, 2], [3, 4]], [[5], [6, 7]], [[]], []])"; - const std::string expected_nested_str = R"([ - "list>[list[1, 2], list[3, 4]]", - "list>[list[5], list[6, 7]]", +])"}, + {large_list(int32()), R"([ + "large_list[1, 2]", + "large_list[3]", + "large_list[]" +])"}, + {list_view(int32()), R"([ + "list_view[1, 2]", + "list_view[3]", + "list_view[]" +])"}, + {large_list_view(int32()), R"([ + "large_list_view[1, 2]", + "large_list_view[3]", + "large_list_view[]" +])"}}; + + const std::string list_json = R"([[1, 2], [3], []])"; + + for (const auto& [list_type, expected_str] : list_types) { + CheckCastToString(list_type, list_json, expected_str); + } + + // Example with nested list of int32. To avoid further code duplication, the code for + // large_list, list_view, and large_list_view is omitted. + const std::vector, std::string>> nested_list_types = + {{list(list(int32())), R"([ + "list>[list[1, 2], list[3]]", + "list>[list[4]]", "list>[list[]]", "list>[]" -])"; - CheckCastToString(list_type, nested_list_json, expected_nested_str); +])"}}; + + const std::string nested_list_json = R"([[[1, 2], [3]], [[4]], [[]], []])"; + + for (const auto& [nested_list_type, expected_nested_str] : nested_list_types) { + CheckCastToString(nested_list_type, nested_list_json, expected_nested_str); + } } class TestMapScalar : public TestCastToString {