Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better estimation for ColumnLowCardinality::Reserve and ColumnString::Reserve #346

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
18 changes: 13 additions & 5 deletions clickhouse/columns/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ ColumnRef ColumnArray::CloneEmpty() const {
return std::make_shared<ColumnArray>(data_->CloneEmpty());
}

void ColumnArray::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
offsets_->Reserve(new_cap);
}

void ColumnArray::Append(ColumnRef column) {
if (auto col = column->As<ColumnArray>()) {
for (size_t i = 0; i < col->Size(); ++i) {
Expand All @@ -65,6 +60,15 @@ void ColumnArray::Append(ColumnRef column) {
}
}

void ColumnArray::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
offsets_->Reserve(new_cap);
}

size_t ColumnArray::Capacity() const {
return data_->Capacity();
}

bool ColumnArray::LoadPrefix(InputStream* input, size_t rows) {
if (!rows) {
return true;
Expand Down Expand Up @@ -110,6 +114,10 @@ size_t ColumnArray::Size() const {
return offsets_->Size();
}

size_t ColumnArray::MemoryUsage() const {
return offsets_->MemoryUsage() + data_->MemoryUsage();
}

void ColumnArray::Swap(Column& other) {
auto & col = dynamic_cast<ColumnArray &>(other);
data_.swap(col.data_);
Expand Down
8 changes: 5 additions & 3 deletions clickhouse/columns/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ class ColumnArray : public Column {
}

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;
size_t Capacity() const override;

/// Loads column prefix from input stream.
bool LoadPrefix(InputStream* input, size_t rows) override;
Expand All @@ -71,6 +71,8 @@ class ColumnArray : public Column {
/// Returns count of rows in the column.
size_t Size() const override;

size_t MemoryUsage() const override;

/// Makes slice of the current column.
ColumnRef Slice(size_t, size_t) const override;
ColumnRef CloneEmpty() const override;
Expand Down
4 changes: 4 additions & 0 deletions clickhouse/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class Column : public std::enable_shared_from_this<Column> {

/// Increase the capacity of the column for large block insertion.
virtual void Reserve(size_t new_cap) = 0;
virtual size_t Capacity() const = 0;

/// Template method to load column data from input stream. It'll call LoadPrefix and LoadBody.
/// Should be called only once from the client. Derived classes should not call it.
Expand Down Expand Up @@ -90,6 +91,9 @@ class Column : public std::enable_shared_from_this<Column> {

virtual void Swap(Column&) = 0;

/// Estimated RAM usage by the column in bytes.
virtual size_t MemoryUsage() const = 0;

/// Get a view on raw item data if it is supported by column, will throw an exception if index is out of range.
/// Please note that view is invalidated once column items are added or deleted, column is loaded from strean or destroyed.
virtual ItemView GetItem(size_t) const {
Expand Down
31 changes: 25 additions & 6 deletions clickhouse/columns/date.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@ uint16_t ColumnDate::RawAt(size_t n) const {
return data_->At(n);
}

std::vector<uint16_t>& ColumnDate::GetWritableData() {
return data_->GetWritableData();
}

void ColumnDate::Append(ColumnRef column) {
if (auto col = column->As<ColumnDate>()) {
data_->Append(col->data_);
}
}

std::vector<uint16_t>& ColumnDate::GetWritableData() {
return data_->GetWritableData();
}

void ColumnDate::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
}
Expand All @@ -67,6 +67,10 @@ size_t ColumnDate::Size() const {
return data_->Size();
}

size_t ColumnDate::MemoryUsage() const {
return data_->MemoryUsage();
}

ColumnRef ColumnDate::Slice(size_t begin, size_t len) const {
auto col = data_->Slice(begin, len)->As<ColumnUInt16>();
auto result = std::make_shared<ColumnDate>();
Expand Down Expand Up @@ -154,6 +158,10 @@ size_t ColumnDate32::Size() const {
return data_->Size();
}

size_t ColumnDate32::MemoryUsage() const {
return data_->MemoryUsage();
}

ColumnRef ColumnDate32::Slice(size_t begin, size_t len) const {
auto col = data_->Slice(begin, len)->As<ColumnInt32>();
auto result = std::make_shared<ColumnDate32>();
Expand Down Expand Up @@ -244,6 +252,10 @@ size_t ColumnDateTime::Size() const {
return data_->Size();
}

size_t ColumnDateTime::MemoryUsage() const {
return data_->MemoryUsage();
}

void ColumnDateTime::Clear() {
data_->Clear();
}
Expand Down Expand Up @@ -303,11 +315,14 @@ std::string ColumnDateTime64::Timezone() const {
return type_->As<DateTime64Type>()->Timezone();
}

void ColumnDateTime64::Reserve(size_t new_cap)
{
void ColumnDateTime64::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
}

size_t ColumnDateTime64::Capacity() const {
return data_->Capacity();
}

void ColumnDateTime64::Append(ColumnRef column) {
if (auto col = column->As<ColumnDateTime64>()) {
data_->Append(col->data_);
Expand All @@ -330,6 +345,10 @@ size_t ColumnDateTime64::Size() const {
return data_->Size();
}

size_t ColumnDateTime64::MemoryUsage() const {
return data_->MemoryUsage();
}

ItemView ColumnDateTime64::GetItem(size_t index) const {
return ItemView(Type::DateTime64, data_->GetItem(index));
}
Expand Down
38 changes: 19 additions & 19 deletions clickhouse/columns/date.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,18 @@ class ColumnDate : public Column {
/// Do append data as is -- number of day in Unix epoch, no conversions performed.
void AppendRaw(uint16_t value);
uint16_t RawAt(size_t n) const;
/// Get Raw Vector Contents
std::vector<uint16_t>& GetWritableData();

public:
/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

/// Get Raw Vector Contents
std::vector<uint16_t>& GetWritableData();

/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Returns the capacity of the column
size_t Capacity() const;
size_t Capacity() const override;

/// Loads column data from input stream.
bool LoadBody(InputStream* input, size_t rows) override;
Expand All @@ -51,6 +51,7 @@ class ColumnDate : public Column {

/// Returns count of rows in the column.
size_t Size() const override;
size_t MemoryUsage() const override;

/// Makes slice of the current column.
ColumnRef Slice(size_t begin, size_t len) const override;
Expand Down Expand Up @@ -88,15 +89,13 @@ class ColumnDate32 : public Column {
/// Get Raw Vector Contents
std::vector<int32_t>& GetWritableData();

/// Returns the capacity of the column
size_t Capacity() const;

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;
/// Returns the capacity of the column
size_t Capacity() const override;

/// Loads column data from input stream.
bool LoadBody(InputStream* input, size_t rows) override;
Expand All @@ -109,6 +108,7 @@ class ColumnDate32 : public Column {

/// Returns count of rows in the column.
size_t Size() const override;
size_t MemoryUsage() const override;

/// Makes slice of the current column.
ColumnRef Slice(size_t begin, size_t len) const override;
Expand Down Expand Up @@ -149,15 +149,13 @@ class ColumnDateTime : public Column {
/// Get Raw Vector Contents
std::vector<uint32_t>& GetWritableData();

/// Returns the capacity of the column
size_t Capacity() const;

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;
/// Returns the capacity of the column
size_t Capacity() const override;

/// Loads column data from input stream.
bool LoadBody(InputStream* input, size_t rows) override;
Expand All @@ -170,6 +168,7 @@ class ColumnDateTime : public Column {

/// Returns count of rows in the column.
size_t Size() const override;
size_t MemoryUsage() const override;

/// Makes slice of the current column.
ColumnRef Slice(size_t begin, size_t len) const override;
Expand Down Expand Up @@ -206,11 +205,11 @@ class ColumnDateTime64 : public Column {
std::string Timezone() const;

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;
size_t Capacity() const override;

/// Loads column data from input stream.
bool LoadBody(InputStream* input, size_t rows) override;
Expand All @@ -223,6 +222,7 @@ class ColumnDateTime64 : public Column {

/// Returns count of rows in the column.
size_t Size() const override;
size_t MemoryUsage() const override;

/// Makes slice of the current column.
ColumnRef Slice(size_t begin, size_t len) const override;
Expand Down
23 changes: 20 additions & 3 deletions clickhouse/columns/decimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,26 @@ Int128 ColumnDecimal::At(size_t i) const {
}
}

void ColumnDecimal::Append(ColumnRef column) {
if (auto col = column->As<ColumnDecimal>()) {
data_->Append(col->data_);
}
}

void ColumnDecimal::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
}

void ColumnDecimal::Append(ColumnRef column) {
if (auto col = column->As<ColumnDecimal>()) {
data_->Append(col->data_);
size_t ColumnDecimal::Capacity() const {
switch (data_->Type()->GetCode()) {
case Type::Int32:
return data_->As<ColumnInt32>()->Capacity();
case Type::Int64:
return data_->As<ColumnInt64>()->Capacity();
case Type::Int128:
return data_->As<ColumnInt128>()->Capacity();
default:
throw ValidationError("Invalid data_ column type in ColumnDecimal");
}
}

Expand All @@ -217,6 +230,10 @@ size_t ColumnDecimal::Size() const {
return data_->Size();
}

size_t ColumnDecimal::MemoryUsage() const {
return data_->MemoryUsage();
}

ColumnRef ColumnDecimal::Slice(size_t begin, size_t len) const {
// coundn't use std::make_shared since this c-tor is private
return ColumnRef{new ColumnDecimal(type_, data_->Slice(begin, len))};
Expand Down
4 changes: 3 additions & 1 deletion clickhouse/columns/decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ class ColumnDecimal : public Column {

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;
void Append(ColumnRef column) override;
void Reserve(size_t new_cap) override;
size_t Capacity() const override;
bool LoadBody(InputStream* input, size_t rows) override;
void SaveBody(OutputStream* output) override;
void Clear() override;
size_t Size() const override;
size_t MemoryUsage() const override;
ColumnRef Slice(size_t begin, size_t len) const override;
ColumnRef CloneEmpty() const override;
void Swap(Column& other) override;
Expand Down
20 changes: 16 additions & 4 deletions clickhouse/columns/enum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,6 @@ void ColumnEnum<T>::SetNameAt(size_t n, const std::string& name) {
data_.at(n) = static_cast<T>(type_->As<EnumType>()->GetEnumValue(name));
}

template<typename T>
void ColumnEnum<T>::Reserve(size_t new_cap) {
data_.reserve(new_cap);
}

template <typename T>
void ColumnEnum<T>::Append(ColumnRef column) {
Expand All @@ -80,6 +76,17 @@ void ColumnEnum<T>::Append(ColumnRef column) {
}
}

template<typename T>
void ColumnEnum<T>::Reserve(size_t new_cap) {
data_.reserve(new_cap);
}

template<typename T>
size_t ColumnEnum<T>::Capacity() const {
return data_.capacity();
}


template <typename T>
bool ColumnEnum<T>::LoadBody(InputStream* input, size_t rows) {
data_.resize(rows);
Expand All @@ -96,6 +103,11 @@ size_t ColumnEnum<T>::Size() const {
return data_.size();
}

template <typename T>
size_t ColumnEnum<T>::MemoryUsage() const {
return data_.capacity() * sizeof(*data_.begin());
}

template <typename T>
ColumnRef ColumnEnum<T>::Slice(size_t begin, size_t len) const {
return std::make_shared<ColumnEnum<T>>(type_, SliceVector(data_, begin, len));
Expand Down
7 changes: 4 additions & 3 deletions clickhouse/columns/enum.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ class ColumnEnum : public Column {
void SetNameAt(size_t n, const std::string& name);

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;
size_t Capacity() const override;

/// Loads column data from input stream.
bool LoadBody(InputStream* input, size_t rows) override;
Expand All @@ -47,6 +47,7 @@ class ColumnEnum : public Column {

/// Returns count of rows in the column.
size_t Size() const override;
size_t MemoryUsage() const override;

/// Makes slice of the current column.
ColumnRef Slice(size_t begin, size_t len) const override;
Expand Down
Loading
Loading