diff --git a/clickhouse/columns/array.cpp b/clickhouse/columns/array.cpp index 9f66b91f..5e5e72e7 100644 --- a/clickhouse/columns/array.cpp +++ b/clickhouse/columns/array.cpp @@ -25,14 +25,9 @@ ColumnArray::ColumnArray(ColumnArray&& other) } void ColumnArray::AppendAsColumn(ColumnRef array) { - if (!data_->Type()->IsEqual(array->Type())) { - throw ValidationError( - "can't append column of type " + array->Type()->GetName() + " " - "to column type " + data_->Type()->GetName()); - } - - AddOffset(array->Size()); + // appending data may throw (i.e. due to ype check failure), so do it first to avoid partly modified state. data_->Append(array); + AddOffset(array->Size()); } ColumnRef ColumnArray::GetAsColumn(size_t n) const { @@ -59,10 +54,6 @@ ColumnRef ColumnArray::CloneEmpty() const { void ColumnArray::Append(ColumnRef column) { if (auto col = column->As()) { - if (!col->data_->Type()->IsEqual(data_->Type())) { - return; - } - for (size_t i = 0; i < col->Size(); ++i) { AppendAsColumn(col->GetAsColumn(i)); } diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index 13bf17a1..c0c12319 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -227,12 +227,21 @@ ColumnRef ColumnLowCardinality::GetDictionary() { } void ColumnLowCardinality::Append(ColumnRef col) { + // Append values from col only if it is either + // - exactly same type as `this`: LowCardinality wrapping same dictionary type + // - same type as dictionary column + auto c = col->As(); - if (!c || !dictionary_column_->Type()->IsEqual(c->dictionary_column_->Type())) - return; + // If not LowCardinality of same dictionary type + if (!c || !dictionary_column_->Type()->IsEqual(c->dictionary_column_->Type())) { + // If not column of the same type as dictionary type + if (!dictionary_column_->Type()->IsEqual(col->GetType())) { + return; + } + } - for (size_t i = 0; i < c->Size(); ++i) { - AppendUnsafe(c->GetItem(i)); + for (size_t i = 0; i < col->Size(); ++i) { + AppendUnsafe(col->GetItem(i)); } } diff --git a/ut/Column_ut.cpp b/ut/Column_ut.cpp index ace1f79d..6e5668e6 100644 --- a/ut/Column_ut.cpp +++ b/ut/Column_ut.cpp @@ -16,6 +16,9 @@ #include #include +#include +#include +#include #include "utils.h" #include "roundtrip_column.h" @@ -46,10 +49,12 @@ std::ostream& operator<<(std::ostream& ostr, const Type::Code& type_code) { template (*CreatorFunction)(), typename GeneratorValueType, - typename std::vector (*GeneratorFunc)()> + typename std::vector (*GeneratorFunction)()> struct GenericColumnTestCase { using ColumnType = ColumnTypeT; + static constexpr auto Creator = CreatorFunction; + static constexpr auto Generator = GeneratorFunction; static auto createColumn() { @@ -58,7 +63,7 @@ struct GenericColumnTestCase static auto generateValues() { - return GeneratorFunc(); + return GeneratorFunction(); } }; @@ -92,7 +97,7 @@ class GenericColumnTest : public testing::Test { return std::tuple{column, values}; } - static std::optional SkipTest(clickhouse::Client& client) { + static std::optional CheckIfShouldSkipTest(clickhouse::Client& client) { if constexpr (std::is_same_v) { // Date32 first appeared in v21.9.2.17-stable const auto server_info = client.GetServerInfo(); @@ -113,6 +118,33 @@ class GenericColumnTest : public testing::Test { } return std::nullopt; } + + template + static void TestColumnRoundtrip(const std::shared_ptr & column, const ClientOptions & client_options) + { + SCOPED_TRACE(::testing::Message("Column type: ") << column->GetType().GetName()); + SCOPED_TRACE(::testing::Message("Client options: ") << client_options); + + clickhouse::Client client(client_options); + + if (auto message = CheckIfShouldSkipTest(client)) { + GTEST_SKIP() << *message; + } + + auto result_typed = RoundtripColumnValues(client, column)->template AsStrict(); + EXPECT_TRUE(CompareRecursive(*column, *result_typed)); + } + + + template + static void TestColumnRoundtrip(const ColumnType & column, const ClientOptions & client_options, CompressionMethods && compression_methods) + { + for (auto compressionMethod : compression_methods) + { + ClientOptions new_options = ClientOptions(client_options).SetCompressionMethod(compressionMethod); + TestColumnRoundtrip(column, new_options); + } + } }; // Luckily all (non-data copying/moving) constructors have size_t params. @@ -184,7 +216,17 @@ using TestCases = ::testing::Types< DecimalColumnTestCase, DecimalColumnTestCase, - DecimalColumnTestCase + DecimalColumnTestCase, + + GenericColumnTestCase, &makeColumn>, std::string, &MakeStrings> + + // Array(String) +// GenericColumnTestCase, &makeColumn>, std::vector, &MakeArrays> + +// // Array(Array(String)) +// GenericColumnTestCase>, &makeColumn>>, +// std::vector>, +// &MakeArrays, &MakeArrays>> >; TYPED_TEST_SUITE(GenericColumnTest, TestCases); @@ -222,7 +264,7 @@ TYPED_TEST(GenericColumnTest, EmptyColumn) { TYPED_TEST(GenericColumnTest, Append) { auto column = this->MakeColumn(); - const auto values = this->GenerateValues(100); + const auto values = this->GenerateValues(10'000); for (const auto & v : values) { EXPECT_NO_THROW(column->Append(v)); @@ -259,10 +301,17 @@ inline auto convertValueForGetItem(const ColumnType& col, ValueType&& t) { } TYPED_TEST(GenericColumnTest, GetItem) { - auto [column, values] = this->MakeColumnWithValues(100); + auto [column, values] = this->MakeColumnWithValues(10'000); ASSERT_EQ(values.size(), column->Size()); - ASSERT_EQ(column->GetItem(0).type, column->GetType().GetCode()); + const auto wrapping_types = std::set{ + Type::Code::LowCardinality, Type::Code::Array, Type::Code::Nullable + }; + + // For wrapping types, type of ItemView can be different from type of column + if (wrapping_types.find(column->GetType().GetCode()) == wrapping_types.end() ) { + EXPECT_EQ(column->GetItem(0).type, column->GetType().GetCode()); + } for (size_t i = 0; i < values.size(); ++i) { const auto v = convertValueForGetItem(*column, values[i]); @@ -274,7 +323,7 @@ TYPED_TEST(GenericColumnTest, GetItem) { } TYPED_TEST(GenericColumnTest, Slice) { - auto [column, values] = this->MakeColumnWithValues(100); + auto [column, values] = this->MakeColumnWithValues(10'000); auto untyped_slice = column->Slice(0, column->Size()); auto slice = untyped_slice->template AsStrict(); @@ -286,7 +335,7 @@ TYPED_TEST(GenericColumnTest, Slice) { } TYPED_TEST(GenericColumnTest, CloneEmpty) { - auto [column, values] = this->MakeColumnWithValues(100); + auto [column, values] = this->MakeColumnWithValues(10'000); EXPECT_EQ(values.size(), column->Size()); auto clone_untyped = column->CloneEmpty(); @@ -298,7 +347,7 @@ TYPED_TEST(GenericColumnTest, CloneEmpty) { } TYPED_TEST(GenericColumnTest, Clear) { - auto [column, values] = this->MakeColumnWithValues(100); + auto [column, values] = this->MakeColumnWithValues(10'000); EXPECT_EQ(values.size(), column->Size()); column->Clear(); @@ -306,7 +355,7 @@ TYPED_TEST(GenericColumnTest, Clear) { } TYPED_TEST(GenericColumnTest, Swap) { - auto [column_A, values] = this->MakeColumnWithValues(100); + auto [column_A, values] = this->MakeColumnWithValues(10'000); auto column_B = this->MakeColumn(); column_A->Swap(*column_B); @@ -318,18 +367,21 @@ TYPED_TEST(GenericColumnTest, Swap) { TYPED_TEST(GenericColumnTest, LoadAndSave) { auto [column_A, values] = this->MakeColumnWithValues(100); - char buffer[4096] = {'\0'}; + // large buffer since we have pretty big values for String column + auto const BufferSize = 10*1024*1024; + std::unique_ptr buffer = std::make_unique(BufferSize); + memset(buffer.get(), 0, BufferSize); { - ArrayOutput output(buffer, sizeof(buffer)); + ArrayOutput output(buffer.get(), BufferSize); // Save - EXPECT_NO_THROW(column_A->Save(&output)); + ASSERT_NO_THROW(column_A->Save(&output)); } auto column_B = this->MakeColumn(); { - ArrayInput input(buffer, sizeof(buffer)); + ArrayInput input(buffer.get(), BufferSize); // Load - EXPECT_TRUE(column_B->Load(&input, values.size())); + ASSERT_TRUE(column_B->Load(&input, values.size())); } EXPECT_TRUE(CompareRecursive(*column_A, *column_B)); @@ -342,25 +394,28 @@ const auto LocalHostEndpoint = ClientOptions() .SetPassword( getEnvOrDefault("CLICKHOUSE_PASSWORD", "")) .SetDefaultDatabase(getEnvOrDefault("CLICKHOUSE_DB", "default")); +const auto AllCompressionMethods = { + clickhouse::CompressionMethod::None, + clickhouse::CompressionMethod::LZ4 +}; + TYPED_TEST(GenericColumnTest, RoundTrip) { - auto [column, values] = this->MakeColumnWithValues(100); + auto [column, values] = this->MakeColumnWithValues(10'000); EXPECT_EQ(values.size(), column->Size()); - clickhouse::Client client(LocalHostEndpoint); - - if (auto message = this->SkipTest(client)) { - GTEST_SKIP() << *message; - } - - auto result_typed = RoundtripColumnValues(client, column)->template AsStrict(); - EXPECT_TRUE(CompareRecursive(*column, *result_typed)); + this->TestColumnRoundtrip(column, LocalHostEndpoint, AllCompressionMethods); } -TYPED_TEST(GenericColumnTest, NulableT_RoundTrip) { +TYPED_TEST(GenericColumnTest, NullableT_RoundTrip) { using NullableType = ColumnNullableT; - auto column = std::make_shared(this->MakeColumn()); - auto values = this->GenerateValues(100); + auto non_nullable_column = this->MakeColumn(); + if (non_nullable_column->GetType().GetCode() == Type::Code::LowCardinality) + // TODO (vnemkov): wrap as ColumnLowCardinalityT> instead of ColumnNullableT> + GTEST_SKIP() << "Can't have " << non_nullable_column->GetType().GetName() << " in Nullable"; + + auto column = std::make_shared(std::move(non_nullable_column)); + auto values = this->GenerateValues(10'000); FromVectorGenerator is_null({true, false}); for (size_t i = 0; i < values.size(); ++i) { @@ -371,12 +426,24 @@ TYPED_TEST(GenericColumnTest, NulableT_RoundTrip) { } } - clickhouse::Client client(LocalHostEndpoint); + this->TestColumnRoundtrip(column, LocalHostEndpoint, AllCompressionMethods); +} + +TYPED_TEST(GenericColumnTest, ArrayT_RoundTrip) { + using ColumnArrayType = ColumnArrayT; - if (auto message = this->SkipTest(client)) { - GTEST_SKIP() << *message; + auto [nested_column, values] = this->MakeColumnWithValues(100); + + auto column = std::make_shared(nested_column->CloneEmpty()->template As()); + for (size_t i = 0; i < values.size(); ++i) + { + const std::vector> row{values.begin(), values.begin() + i}; + column->Append(values.begin(), values.begin() + i); + + EXPECT_TRUE(CompareRecursive(row, (*column)[column->Size() - 1])); } + EXPECT_EQ(values.size(), column->Size()); - auto result_typed = WrapColumn(RoundtripColumnValues(client, column)); - EXPECT_TRUE(CompareRecursive(*column, *result_typed)); + this->TestColumnRoundtrip(column, LocalHostEndpoint, AllCompressionMethods); } + diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index ce477554..152cf65f 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -105,9 +105,10 @@ TEST(ColumnsCase, FixedString_Append_LargeString) { } TEST(ColumnsCase, StringInit) { - auto col = std::make_shared(MakeStrings()); + auto values = MakeStrings(); + auto col = std::make_shared(values); - ASSERT_EQ(col->Size(), 4u); + ASSERT_EQ(col->Size(), values.size()); ASSERT_EQ(col->At(1), "ab"); ASSERT_EQ(col->At(3), "abcd"); } diff --git a/ut/roundtrip_column.cpp b/ut/roundtrip_column.cpp index c4685a3b..5af9624a 100644 --- a/ut/roundtrip_column.cpp +++ b/ut/roundtrip_column.cpp @@ -4,28 +4,49 @@ #include #include +#include +#include "clickhouse/columns/numeric.h" namespace { using namespace clickhouse; + +template +std::vector GenerateConsecutiveNumbers(size_t count, T start = 0) +{ + std::vector result; + result.reserve(count); + + T value = start; + for (size_t i = 0; i < count; ++i, ++value) + { + result.push_back(value); + } + + return result; +} + } + ColumnRef RoundtripColumnValues(Client& client, ColumnRef expected) { - // Create a temporary table with a single column - // insert values from `expected` - // select and aggregate all values from block into `result` column + // Create a temporary table with a corresponding data column + // INSERT values from `expected` + // SELECT and collect all values from block into `result` column auto result = expected->CloneEmpty(); const std::string type_name = result->GetType().GetName(); client.Execute("DROP TEMPORARY TABLE IF EXISTS temporary_roundtrip_table;"); - client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS temporary_roundtrip_table (col " + type_name + ");"); + // id column is to have the same order of rows on SELECT + client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS temporary_roundtrip_table (id UInt32, col " + type_name + ");"); { Block block; block.AppendColumn("col", expected); + block.AppendColumn("id", std::make_shared(GenerateConsecutiveNumbers(expected->Size()))); block.RefreshRowCount(); client.Insert("temporary_roundtrip_table", block); } - client.Select("SELECT col FROM temporary_roundtrip_table", [&result](const Block& b) { + client.Select("SELECT col FROM temporary_roundtrip_table ORDER BY id", [&result](const Block& b) { if (b.GetRowCount() == 0) return; diff --git a/ut/utils.h b/ut/utils.h index 41bec25c..b4b8e92d 100644 --- a/ut/utils.h +++ b/ut/utils.h @@ -167,7 +167,10 @@ std::ostream& operator<<(std::ostream & ostr, const PrintContainer& print_con for (auto i = std::begin(container); i != std::end(container); /*intentionally no ++i*/) { const auto & elem = *i; - if constexpr (is_container_v>) { + if constexpr (is_string_v) { + ostr << '"' << elem << '"'; + } + else if constexpr (is_container_v>) { ostr << PrintContainer{elem}; } else { ostr << elem; diff --git a/ut/utils_comparison.h b/ut/utils_comparison.h index 0b7f805a..e500e4f3 100644 --- a/ut/utils_comparison.h +++ b/ut/utils_comparison.h @@ -143,7 +143,8 @@ struct PrintContainer; template ::testing::AssertionResult CompareRecursive(const Left & left, const Right & right) { - if constexpr ((is_container_v || std::is_base_of_v>) + if constexpr (!is_string_v && !is_string_v + && (is_container_v || std::is_base_of_v>) && (is_container_v || std::is_base_of_v>) ) { const auto & l = maybeWrapColumnAsContainer(left); diff --git a/ut/utils_meta.h b/ut/utils_meta.h index e069dc5f..3c50da4e 100644 --- a/ut/utils_meta.h +++ b/ut/utils_meta.h @@ -2,6 +2,8 @@ #include #include // for std::begin +#include +#include // based on https://stackoverflow.com/a/31207079 template @@ -46,3 +48,6 @@ struct is_instantiation_of : std::false_type {}; template < template class Template, typename... Args > struct is_instantiation_of< Template, Template > : std::true_type {}; + +template +inline constexpr bool is_string_v = std::is_same_v> || std::is_same_v>; diff --git a/ut/utils_ut.cpp b/ut/utils_ut.cpp index f69e1daa..bd015751 100644 --- a/ut/utils_ut.cpp +++ b/ut/utils_ut.cpp @@ -1,4 +1,5 @@ #include +#include "ut/value_generators.h" #include "utils.h" #include @@ -113,3 +114,9 @@ TEST(CompareRecursive, OptionalNan) { // EXPECT_FALSE(CompareRecursive(NaNdo, std::nullopt)); // EXPECT_FALSE(CompareRecursive(NaNfo, std::nullopt)); } + + +TEST(Generators, MakeArrays) { + auto arrays = MakeArrays(); + ASSERT_LT(0u, arrays.size()); +} diff --git a/ut/value_generators.cpp b/ut/value_generators.cpp index a0316a3a..f6d7baf9 100644 --- a/ut/value_generators.cpp +++ b/ut/value_generators.cpp @@ -17,7 +17,7 @@ std::vector MakeBools() { } std::vector MakeFixedStrings(size_t string_size) { - std::vector result {"aaa", "bbb", "ccc", "ddd"}; + std::vector result = MakeStrings(); std::for_each(result.begin(), result.end(), [string_size](auto& value) { value.resize(string_size, '\0'); @@ -27,7 +27,28 @@ std::vector MakeFixedStrings(size_t string_size) { } std::vector MakeStrings() { - return {"a", "ab", "abc", "abcd"}; + return { + "a", "ab", "abc", "abcd", + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + "long string to test how those are handled. Here goes more text. " + }; } std::vector MakeUUIDs() { diff --git a/ut/value_generators.h b/ut/value_generators.h index 8ef21d6e..031f0fe9 100644 --- a/ut/value_generators.h +++ b/ut/value_generators.h @@ -122,6 +122,20 @@ inline auto MakeDates() { return result; } +template (*Generator)()> +inline auto MakeArrays() { + const auto nested_values = Generator(); + std::vector> result; + result.reserve(nested_values.size()); + + for (size_t i = 0; i < nested_values.size(); ++i) + { + result.emplace_back(nested_values.begin(), nested_values.begin() + i); + } + + return result; +} + std::string FooBarGenerator(size_t i);