From 361d6bc0fdb86a5076ec517994ed997440051bbe Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 26 May 2024 14:10:44 +0900 Subject: [PATCH] support cast from list-like to string --- .../compute/kernels/scalar_cast_string.cc | 85 +++++++++++++++++++ cpp/src/arrow/scalar_test.cc | 16 ++-- 2 files changed, 91 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index dc3fe29a3dfae..f83e658fe9404 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -510,6 +510,90 @@ void AddBinaryToFixedSizeBinaryCast(CastFunction* func) { AddBinaryToFixedSizeBinaryCast(func); } +template +struct ListLikeToStringCastFunctor { + using BuilderType = typename TypeTraits::BuilderType; + + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const ArraySpan& input = batch[0].array; + + BuilderType builder(ctx->memory_pool()); + RETURN_NOT_OK(builder.Reserve(input.length)); + + std::string type_info = input.type->ToString(true); + const ArraySpan& values = input.child_data[0]; + const auto* offsets = input.GetValues(1); + + int list_size = -1; + if (input.type->id() == Type::FIXED_SIZE_LIST) { + list_size = checked_cast(*input.type).list_size(); + } + + for (int64_t i = 0; i < input.length; ++i) { + if (!input.IsValid(i)) { + RETURN_NOT_OK(builder.Append("null")); + continue; + } + + std::ostringstream ss; + ss << type_info << "["; + + int64_t start, end; + if (input.type->id() == Type::FIXED_SIZE_LIST) { + start = i * list_size; + end = start + list_size; + } else { + start = offsets[i]; + end = offsets[i + 1]; + } + + for (int64_t j = start; j < end; ++j) { + if (j != start) { + ss << ", "; + } + if (values.IsValid(j)) { + ss << std::to_string(values.GetValues(1)[j]); + } else { + ss << "null"; + } + } + ss << "]"; + RETURN_NOT_OK(builder.Append(ss.str())); + } + + std::shared_ptr output_array; + RETURN_NOT_OK(builder.Finish(&output_array)); + out->value = output_array->data(); + return Status::OK(); + } +}; + +template +void AddListLikeToStringCasts(CastFunction* func) { + auto out_ty = TypeTraits::type_singleton(); + + DCHECK_OK(func->AddKernel(Type::LIST, {InputType(Type::LIST)}, out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::LARGE_LIST, {InputType(Type::LARGE_LIST)}, out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::LIST_VIEW, {InputType(Type::LIST_VIEW)}, out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::LARGE_LIST_VIEW, {InputType(Type::LARGE_LIST_VIEW)}, + out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::FIXED_SIZE_LIST, {InputType(Type::FIXED_SIZE_LIST)}, + out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::MAP, {InputType(Type::MAP)}, out_ty, + ListLikeToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); +} + } // namespace std::vector> GetBinaryLikeCasts() { @@ -528,6 +612,7 @@ std::vector> GetBinaryLikeCasts() { AddDecimalToStringCasts(cast_string.get()); AddTemporalToStringCasts(cast_string.get()); AddBinaryToBinaryCast(cast_string.get()); + AddListLikeToStringCasts(cast_string.get()); auto cast_large_string = std::make_shared("cast_large_string", Type::LARGE_STRING); diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc index 104a5697b5727..ce271fdcf19d5 100644 --- a/cpp/src/arrow/scalar_test.cc +++ b/cpp/src/arrow/scalar_test.cc @@ -1292,11 +1292,9 @@ class TestListLikeScalar : public ::testing::Test { auto invalid_cast_type = fixed_size_list(value_->type(), 5); CheckListCastError(scalar, invalid_cast_type); - // Cast() function doesn't support casting list-like to string, use Scalar::CastTo() - // instead. - ASSERT_OK_AND_ASSIGN(auto casted_str, scalar.CastTo(utf8())); - ASSERT_EQ(casted_str->type->id(), utf8()->id()); - ASSERT_EQ(casted_str->ToString(), scalar.ToString()); + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, utf8())); + ASSERT_EQ(casted_str.scalar()->type->id(), utf8()->id()); + ASSERT_EQ(casted_str.scalar()->ToString(), scalar.ToString()); } protected: @@ -1337,11 +1335,9 @@ TEST(TestFixedSizeListScalar, Cast) { auto invalid_cast_type = fixed_size_list(int16(), 4); CheckListCastError(scalar, invalid_cast_type); - // Cast() function doesn't support casting list-like to string, use Scalar::CastTo() - // instead. - ASSERT_OK_AND_ASSIGN(auto casted_str, scalar.CastTo(utf8())); - ASSERT_EQ(casted_str->type->id(), utf8()->id()); - ASSERT_EQ(casted_str->ToString(), scalar.ToString()); + ASSERT_OK_AND_ASSIGN(auto casted_str, Cast(scalar, utf8())); + ASSERT_EQ(casted_str.scalar()->type->id(), utf8()->id()); + ASSERT_EQ(casted_str.scalar()->ToString(), scalar.ToString()); } TEST(TestMapScalar, Basics) {