Skip to content

Commit

Permalink
Merge pull request #331 from Enmk/fix_Array_of_LowCardinality
Browse files Browse the repository at this point in the history
Fix `ColumnArrayT<ColumnLowCardinalityT<ColumnString>>::Append`
  • Loading branch information
Enmk authored Sep 21, 2023
2 parents b297a6e + 7d83db4 commit e2ac63c
Show file tree
Hide file tree
Showing 11 changed files with 199 additions and 59 deletions.
13 changes: 2 additions & 11 deletions clickhouse/columns/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,9 @@ ColumnArray::ColumnArray(ColumnArray&& other)
}

void ColumnArray::AppendAsColumn(ColumnRef array) {
if (!data_->Type()->IsEqual(array->Type())) {
throw ValidationError(
"can't append column of type " + array->Type()->GetName() + " "
"to column type " + data_->Type()->GetName());
}

AddOffset(array->Size());
// appending data may throw (i.e. due to ype check failure), so do it first to avoid partly modified state.
data_->Append(array);
AddOffset(array->Size());
}

ColumnRef ColumnArray::GetAsColumn(size_t n) const {
Expand All @@ -59,10 +54,6 @@ ColumnRef ColumnArray::CloneEmpty() const {

void ColumnArray::Append(ColumnRef column) {
if (auto col = column->As<ColumnArray>()) {
if (!col->data_->Type()->IsEqual(data_->Type())) {
return;
}

for (size_t i = 0; i < col->Size(); ++i) {
AppendAsColumn(col->GetAsColumn(i));
}
Expand Down
17 changes: 13 additions & 4 deletions clickhouse/columns/lowcardinality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,21 @@ ColumnRef ColumnLowCardinality::GetDictionary() {
}

void ColumnLowCardinality::Append(ColumnRef col) {
// Append values from col only if it is either
// - exactly same type as `this`: LowCardinality wrapping same dictionary type
// - same type as dictionary column

auto c = col->As<ColumnLowCardinality>();
if (!c || !dictionary_column_->Type()->IsEqual(c->dictionary_column_->Type()))
return;
// If not LowCardinality of same dictionary type
if (!c || !dictionary_column_->Type()->IsEqual(c->dictionary_column_->Type())) {
// If not column of the same type as dictionary type
if (!dictionary_column_->Type()->IsEqual(col->GetType())) {
return;
}
}

for (size_t i = 0; i < c->Size(); ++i) {
AppendUnsafe(c->GetItem(i));
for (size_t i = 0; i < col->Size(); ++i) {
AppendUnsafe(col->GetItem(i));
}
}

Expand Down
133 changes: 100 additions & 33 deletions ut/Column_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#include <clickhouse/client.h>

#include <gtest/gtest.h>
#include <initializer_list>
#include <memory>
#include <type_traits>

#include "utils.h"
#include "roundtrip_column.h"
Expand Down Expand Up @@ -46,10 +49,12 @@ std::ostream& operator<<(std::ostream& ostr, const Type::Code& type_code) {
template <typename ColumnTypeT,
typename std::shared_ptr<ColumnTypeT> (*CreatorFunction)(),
typename GeneratorValueType,
typename std::vector<GeneratorValueType> (*GeneratorFunc)()>
typename std::vector<GeneratorValueType> (*GeneratorFunction)()>
struct GenericColumnTestCase
{
using ColumnType = ColumnTypeT;
static constexpr auto Creator = CreatorFunction;
static constexpr auto Generator = GeneratorFunction;

static auto createColumn()
{
Expand All @@ -58,7 +63,7 @@ struct GenericColumnTestCase

static auto generateValues()
{
return GeneratorFunc();
return GeneratorFunction();
}
};

Expand Down Expand Up @@ -92,7 +97,7 @@ class GenericColumnTest : public testing::Test {
return std::tuple{column, values};
}

static std::optional<std::string> SkipTest(clickhouse::Client& client) {
static std::optional<std::string> CheckIfShouldSkipTest(clickhouse::Client& client) {
if constexpr (std::is_same_v<ColumnType, ColumnDate32>) {
// Date32 first appeared in v21.9.2.17-stable
const auto server_info = client.GetServerInfo();
Expand All @@ -113,6 +118,33 @@ class GenericColumnTest : public testing::Test {
}
return std::nullopt;
}

template <typename ColumnType>
static void TestColumnRoundtrip(const std::shared_ptr<ColumnType> & column, const ClientOptions & client_options)
{
SCOPED_TRACE(::testing::Message("Column type: ") << column->GetType().GetName());
SCOPED_TRACE(::testing::Message("Client options: ") << client_options);

clickhouse::Client client(client_options);

if (auto message = CheckIfShouldSkipTest(client)) {
GTEST_SKIP() << *message;
}

auto result_typed = RoundtripColumnValues(client, column)->template AsStrict<ColumnType>();
EXPECT_TRUE(CompareRecursive(*column, *result_typed));
}


template <typename ColumnType, typename CompressionMethods>
static void TestColumnRoundtrip(const ColumnType & column, const ClientOptions & client_options, CompressionMethods && compression_methods)
{
for (auto compressionMethod : compression_methods)
{
ClientOptions new_options = ClientOptions(client_options).SetCompressionMethod(compressionMethod);
TestColumnRoundtrip(column, new_options);
}
}
};

// Luckily all (non-data copying/moving) constructors have size_t params.
Expand Down Expand Up @@ -184,7 +216,17 @@ using TestCases = ::testing::Types<
DecimalColumnTestCase<ColumnDecimal, 12, 9>,

DecimalColumnTestCase<ColumnDecimal, 6, 0>,
DecimalColumnTestCase<ColumnDecimal, 6, 3>
DecimalColumnTestCase<ColumnDecimal, 6, 3>,

GenericColumnTestCase<ColumnLowCardinalityT<ColumnString>, &makeColumn<ColumnLowCardinalityT<ColumnString>>, std::string, &MakeStrings>

// Array(String)
// GenericColumnTestCase<ColumnArrayT<ColumnString>, &makeColumn<ColumnArrayT<ColumnString>>, std::vector<std::string>, &MakeArrays<std::string, &MakeStrings>>

// // Array(Array(String))
// GenericColumnTestCase<ColumnArrayT<ColumnArrayT<ColumnString>>, &makeColumn<ColumnArrayT<ColumnArrayT<ColumnString>>>,
// std::vector<std::vector<std::string>>,
// &MakeArrays<std::vector<std::string>, &MakeArrays<std::string, &MakeStrings>>>
>;

TYPED_TEST_SUITE(GenericColumnTest, TestCases);
Expand Down Expand Up @@ -222,7 +264,7 @@ TYPED_TEST(GenericColumnTest, EmptyColumn) {

TYPED_TEST(GenericColumnTest, Append) {
auto column = this->MakeColumn();
const auto values = this->GenerateValues(100);
const auto values = this->GenerateValues(10'000);

for (const auto & v : values) {
EXPECT_NO_THROW(column->Append(v));
Expand Down Expand Up @@ -259,10 +301,17 @@ inline auto convertValueForGetItem(const ColumnType& col, ValueType&& t) {
}

TYPED_TEST(GenericColumnTest, GetItem) {
auto [column, values] = this->MakeColumnWithValues(100);
auto [column, values] = this->MakeColumnWithValues(10'000);

ASSERT_EQ(values.size(), column->Size());
ASSERT_EQ(column->GetItem(0).type, column->GetType().GetCode());
const auto wrapping_types = std::set<Type::Code>{
Type::Code::LowCardinality, Type::Code::Array, Type::Code::Nullable
};

// For wrapping types, type of ItemView can be different from type of column
if (wrapping_types.find(column->GetType().GetCode()) == wrapping_types.end() ) {
EXPECT_EQ(column->GetItem(0).type, column->GetType().GetCode());
}

for (size_t i = 0; i < values.size(); ++i) {
const auto v = convertValueForGetItem(*column, values[i]);
Expand All @@ -274,7 +323,7 @@ TYPED_TEST(GenericColumnTest, GetItem) {
}

TYPED_TEST(GenericColumnTest, Slice) {
auto [column, values] = this->MakeColumnWithValues(100);
auto [column, values] = this->MakeColumnWithValues(10'000);

auto untyped_slice = column->Slice(0, column->Size());
auto slice = untyped_slice->template AsStrict<typename TestFixture::ColumnType>();
Expand All @@ -286,7 +335,7 @@ TYPED_TEST(GenericColumnTest, Slice) {
}

TYPED_TEST(GenericColumnTest, CloneEmpty) {
auto [column, values] = this->MakeColumnWithValues(100);
auto [column, values] = this->MakeColumnWithValues(10'000);
EXPECT_EQ(values.size(), column->Size());

auto clone_untyped = column->CloneEmpty();
Expand All @@ -298,15 +347,15 @@ TYPED_TEST(GenericColumnTest, CloneEmpty) {
}

TYPED_TEST(GenericColumnTest, Clear) {
auto [column, values] = this->MakeColumnWithValues(100);
auto [column, values] = this->MakeColumnWithValues(10'000);
EXPECT_EQ(values.size(), column->Size());

column->Clear();
EXPECT_EQ(0u, column->Size());
}

TYPED_TEST(GenericColumnTest, Swap) {
auto [column_A, values] = this->MakeColumnWithValues(100);
auto [column_A, values] = this->MakeColumnWithValues(10'000);
auto column_B = this->MakeColumn();

column_A->Swap(*column_B);
Expand All @@ -318,18 +367,21 @@ TYPED_TEST(GenericColumnTest, Swap) {
TYPED_TEST(GenericColumnTest, LoadAndSave) {
auto [column_A, values] = this->MakeColumnWithValues(100);

char buffer[4096] = {'\0'};
// large buffer since we have pretty big values for String column
auto const BufferSize = 10*1024*1024;
std::unique_ptr<char[]> buffer = std::make_unique<char[]>(BufferSize);
memset(buffer.get(), 0, BufferSize);
{
ArrayOutput output(buffer, sizeof(buffer));
ArrayOutput output(buffer.get(), BufferSize);
// Save
EXPECT_NO_THROW(column_A->Save(&output));
ASSERT_NO_THROW(column_A->Save(&output));
}

auto column_B = this->MakeColumn();
{
ArrayInput input(buffer, sizeof(buffer));
ArrayInput input(buffer.get(), BufferSize);
// Load
EXPECT_TRUE(column_B->Load(&input, values.size()));
ASSERT_TRUE(column_B->Load(&input, values.size()));
}

EXPECT_TRUE(CompareRecursive(*column_A, *column_B));
Expand All @@ -342,25 +394,28 @@ const auto LocalHostEndpoint = ClientOptions()
.SetPassword( getEnvOrDefault("CLICKHOUSE_PASSWORD", ""))
.SetDefaultDatabase(getEnvOrDefault("CLICKHOUSE_DB", "default"));

const auto AllCompressionMethods = {
clickhouse::CompressionMethod::None,
clickhouse::CompressionMethod::LZ4
};

TYPED_TEST(GenericColumnTest, RoundTrip) {
auto [column, values] = this->MakeColumnWithValues(100);
auto [column, values] = this->MakeColumnWithValues(10'000);
EXPECT_EQ(values.size(), column->Size());

clickhouse::Client client(LocalHostEndpoint);

if (auto message = this->SkipTest(client)) {
GTEST_SKIP() << *message;
}

auto result_typed = RoundtripColumnValues(client, column)->template AsStrict<typename TestFixture::ColumnType>();
EXPECT_TRUE(CompareRecursive(*column, *result_typed));
this->TestColumnRoundtrip(column, LocalHostEndpoint, AllCompressionMethods);
}

TYPED_TEST(GenericColumnTest, NulableT_RoundTrip) {
TYPED_TEST(GenericColumnTest, NullableT_RoundTrip) {
using NullableType = ColumnNullableT<typename TestFixture::ColumnType>;

auto column = std::make_shared<NullableType>(this->MakeColumn());
auto values = this->GenerateValues(100);
auto non_nullable_column = this->MakeColumn();
if (non_nullable_column->GetType().GetCode() == Type::Code::LowCardinality)
// TODO (vnemkov): wrap as ColumnLowCardinalityT<ColumnNullableT<NestedColumn>> instead of ColumnNullableT<ColumnLowCardinalityT<NestedColumn>>
GTEST_SKIP() << "Can't have " << non_nullable_column->GetType().GetName() << " in Nullable";

auto column = std::make_shared<NullableType>(std::move(non_nullable_column));
auto values = this->GenerateValues(10'000);

FromVectorGenerator<bool> is_null({true, false});
for (size_t i = 0; i < values.size(); ++i) {
Expand All @@ -371,12 +426,24 @@ TYPED_TEST(GenericColumnTest, NulableT_RoundTrip) {
}
}

clickhouse::Client client(LocalHostEndpoint);
this->TestColumnRoundtrip(column, LocalHostEndpoint, AllCompressionMethods);
}

TYPED_TEST(GenericColumnTest, ArrayT_RoundTrip) {
using ColumnArrayType = ColumnArrayT<typename TestFixture::ColumnType>;

if (auto message = this->SkipTest(client)) {
GTEST_SKIP() << *message;
auto [nested_column, values] = this->MakeColumnWithValues(100);

auto column = std::make_shared<ColumnArrayType>(nested_column->CloneEmpty()->template As<typename TestFixture::ColumnType>());
for (size_t i = 0; i < values.size(); ++i)
{
const std::vector<std::decay_t<decltype(values[0])>> row{values.begin(), values.begin() + i};
column->Append(values.begin(), values.begin() + i);

EXPECT_TRUE(CompareRecursive(row, (*column)[column->Size() - 1]));
}
EXPECT_EQ(values.size(), column->Size());

auto result_typed = WrapColumn<NullableType>(RoundtripColumnValues(client, column));
EXPECT_TRUE(CompareRecursive(*column, *result_typed));
this->TestColumnRoundtrip(column, LocalHostEndpoint, AllCompressionMethods);
}

5 changes: 3 additions & 2 deletions ut/columns_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,10 @@ TEST(ColumnsCase, FixedString_Append_LargeString) {
}

TEST(ColumnsCase, StringInit) {
auto col = std::make_shared<ColumnString>(MakeStrings());
auto values = MakeStrings();
auto col = std::make_shared<ColumnString>(values);

ASSERT_EQ(col->Size(), 4u);
ASSERT_EQ(col->Size(), values.size());
ASSERT_EQ(col->At(1), "ab");
ASSERT_EQ(col->At(3), "abcd");
}
Expand Down
31 changes: 26 additions & 5 deletions ut/roundtrip_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,49 @@
#include <clickhouse/block.h>

#include <gtest/gtest.h>
#include <type_traits>
#include "clickhouse/columns/numeric.h"

namespace {
using namespace clickhouse;

template <typename T>
std::vector<T> GenerateConsecutiveNumbers(size_t count, T start = 0)
{
std::vector<T> result;
result.reserve(count);

T value = start;
for (size_t i = 0; i < count; ++i, ++value)
{
result.push_back(value);
}

return result;
}

}


ColumnRef RoundtripColumnValues(Client& client, ColumnRef expected) {
// Create a temporary table with a single column
// insert values from `expected`
// select and aggregate all values from block into `result` column
// Create a temporary table with a corresponding data column
// INSERT values from `expected`
// SELECT and collect all values from block into `result` column
auto result = expected->CloneEmpty();

const std::string type_name = result->GetType().GetName();
client.Execute("DROP TEMPORARY TABLE IF EXISTS temporary_roundtrip_table;");
client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS temporary_roundtrip_table (col " + type_name + ");");
// id column is to have the same order of rows on SELECT
client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS temporary_roundtrip_table (id UInt32, col " + type_name + ");");
{
Block block;
block.AppendColumn("col", expected);
block.AppendColumn("id", std::make_shared<ColumnUInt32>(GenerateConsecutiveNumbers<uint32_t>(expected->Size())));
block.RefreshRowCount();
client.Insert("temporary_roundtrip_table", block);
}

client.Select("SELECT col FROM temporary_roundtrip_table", [&result](const Block& b) {
client.Select("SELECT col FROM temporary_roundtrip_table ORDER BY id", [&result](const Block& b) {
if (b.GetRowCount() == 0)
return;

Expand Down
5 changes: 4 additions & 1 deletion ut/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,10 @@ std::ostream& operator<<(std::ostream & ostr, const PrintContainer<T>& print_con
for (auto i = std::begin(container); i != std::end(container); /*intentionally no ++i*/) {
const auto & elem = *i;

if constexpr (is_container_v<std::decay_t<decltype(elem)>>) {
if constexpr (is_string_v<decltype(elem)>) {
ostr << '"' << elem << '"';
}
else if constexpr (is_container_v<std::decay_t<decltype(elem)>>) {
ostr << PrintContainer{elem};
} else {
ostr << elem;
Expand Down
Loading

0 comments on commit e2ac63c

Please sign in to comment.