Skip to content

Commit cae657a

Browse files
authored
Merge pull request #427 from Enmk/block_clear
Block API minor improvements
2 parents 38dd15d + d0c6abc commit cae657a

File tree

9 files changed

+455
-8
lines changed

9 files changed

+455
-8
lines changed

clickhouse/block.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,7 @@ size_t Block::GetRowCount() const {
8181
return rows_;
8282
}
8383

84-
size_t Block::RefreshRowCount()
85-
{
84+
size_t Block::RefreshRowCount() {
8685
size_t rows = 0UL;
8786

8887
for (size_t idx = 0UL; idx < columns_.size(); ++idx)
@@ -100,6 +99,22 @@ size_t Block::RefreshRowCount()
10099
return rows_;
101100
}
102101

102+
void Block::Clear() {
103+
for (auto & c : columns_) {
104+
c.column->Clear();
105+
}
106+
107+
RefreshRowCount();
108+
}
109+
110+
void Block::Reserve(size_t new_cap) {
111+
for (auto & c : columns_) {
112+
c.column->Reserve(new_cap);
113+
}
114+
}
115+
116+
117+
103118
ColumnRef Block::operator [] (size_t idx) const {
104119
if (idx < columns_.size()) {
105120
return columns_[idx].column;

clickhouse/block.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ class Block {
8585
return columns_.at(idx).name;
8686
}
8787

88+
/// Convinience method to wipe out all rows from all columns
89+
void Clear();
90+
91+
/// Convinience method to do Reserve() on all columns
92+
void Reserve(size_t new_cap);
93+
8894
/// Reference to column by index in the block.
8995
ColumnRef operator [] (size_t idx) const;
9096

clickhouse/columns/itemview.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ struct ItemView {
6565
using ValueType = std::remove_cv_t<std::decay_t<T>>;
6666
if constexpr (std::is_same_v<std::string_view, ValueType> || std::is_same_v<std::string, ValueType>) {
6767
return data;
68-
} else if constexpr (std::is_fundamental_v<ValueType> || std::is_same_v<Int128, ValueType>) {
68+
} else if constexpr (std::is_fundamental_v<ValueType> || std::is_same_v<Int128, ValueType> || std::is_same_v<UInt128, ValueType>) {
6969
if (sizeof(ValueType) == data.size()) {
7070
return *reinterpret_cast<const T*>(data.data());
7171
} else {

clickhouse/columns/lowcardinality.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <functional>
1010
#include <string_view>
1111
#include <type_traits>
12+
#include <cmath>
1213

1314
#include <cassert>
1415

@@ -175,8 +176,11 @@ ColumnLowCardinality::~ColumnLowCardinality()
175176
{}
176177

177178
void ColumnLowCardinality::Reserve(size_t new_cap) {
178-
dictionary_column_->Reserve(new_cap);
179-
index_column_->Reserve(new_cap);
179+
// Assumption is that dictionary must be smaller than index.
180+
// NOTE(vnemkov): Formula below (`ceil(sqrt(x))`) is a gut-feeling-good-enough estimation,
181+
// feel free to replace/adjust if you have better one suported by actual data.
182+
dictionary_column_->Reserve(static_cast<size_t>(ceil(sqrt(static_cast<double>(new_cap)))));
183+
index_column_->Reserve(new_cap + 2); // + 1 for null item (at pos 0), + 1 for default item (at pos 1)
180184
}
181185

182186
void ColumnLowCardinality::Setup(ColumnRef dictionary_column) {

ut/block_ut.cpp

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,37 @@
11
#include <clickhouse/client.h>
2-
#include "readonly_client_test.h"
3-
#include "connection_failed_client_test.h"
2+
#include <clickhouse/columns/tuple.h>
3+
#include <clickhouse/types/types.h>
4+
5+
#include "clickhouse/columns/column.h"
6+
#include "clickhouse/columns/lowcardinality.h"
7+
#include "gtest/gtest-message.h"
8+
9+
#include "ut/utils_comparison.h"
410
#include "utils.h"
511

612
#include <gtest/gtest.h>
13+
#include <memory>
714

815
namespace {
916
using namespace clickhouse;
1017

1118
Block MakeBlock(std::vector<std::pair<std::string, ColumnRef>> columns) {
1219
Block result;
1320

21+
const size_t number_of_rows = columns.size() ? columns[0].second->Size() : 0;
22+
size_t i = 0;
1423
for (const auto & name_and_col : columns) {
24+
EXPECT_EQ(number_of_rows, name_and_col.second->Size())
25+
<< "Column #" << i << " " << name_and_col.first << " has incorrect number of rows";
26+
1527
result.AppendColumn(name_and_col.first, name_and_col.second);
28+
29+
++i;
1630
}
1731

1832
result.RefreshRowCount();
33+
EXPECT_EQ(number_of_rows, result.GetRowCount());
34+
1935
return result;
2036
}
2137

@@ -83,3 +99,83 @@ TEST(BlockTest, Iterators) {
8399
ASSERT_NE(block.cbegin(), block.cend());
84100
}
85101

102+
TEST(BlockTest, Clear) {
103+
// Test that Block::Clear removes all rows from all of the columns,
104+
// without changing column instances, types, names, etc.
105+
106+
auto block = MakeBlock({
107+
{"foo", std::make_shared<ColumnUInt8>(std::vector<uint8_t>{1, 2, 3, 4, 5})},
108+
{"bar", std::make_shared<ColumnString>(std::vector<std::string>{"1", "2", "3", "4", "5"})},
109+
});
110+
111+
std::vector<std::tuple<std::string, Column*>> expected_columns_description;
112+
for (const auto & c : block) {
113+
expected_columns_description.emplace_back(c.Name(), c.Column().get());
114+
}
115+
116+
block.Clear();
117+
118+
// Block must report empty after being cleared
119+
EXPECT_EQ(0u, block.GetRowCount());
120+
121+
size_t i = 0;
122+
for (const auto & c : block) {
123+
const auto & [expected_name, expected_column] = expected_columns_description[i];
124+
SCOPED_TRACE(testing::Message("col #") << c.ColumnIndex() << " \"" << c.Name() << "\"");
125+
126+
// MUST be same column object
127+
EXPECT_EQ(expected_column, c.Column().get());
128+
129+
// MUST have same column name
130+
EXPECT_EQ(expected_name, c.Name());
131+
132+
// column MUST be empty
133+
EXPECT_EQ(0u, c.Column()->Size());
134+
135+
++i;
136+
}
137+
}
138+
139+
TEST(BlockTest, Reserve) {
140+
// Test that Block::Reserve reserves space in all columns (uncheckable now),
141+
// without changing column instances, names, and previously stored rows.
142+
143+
auto block = MakeBlock({
144+
{"foo", std::make_shared<ColumnUInt8>(std::vector<uint8_t>{1, 2, 3, 4, 5})},
145+
{"bar", std::make_shared<ColumnString>(std::vector<std::string>{"1", "2", "3", "4", "5"})},
146+
{"quix", std::make_shared<ColumnLowCardinalityT<ColumnString>>(std::vector<std::string>{"1", "2", "3", "4", "5"})},
147+
});
148+
149+
const size_t initial_rows_count = block.GetRowCount();
150+
151+
std::vector<std::tuple<std::string, Column*, ColumnRef>> expected_columns_description;
152+
for (const auto & c : block) {
153+
expected_columns_description.emplace_back(
154+
c.Name(),
155+
c.Column().get(), // reference to the actual object
156+
c.Column()->Slice(0, c.Column()->Size()) // reference to the values
157+
);
158+
}
159+
160+
block.Reserve(1000); // 1000 is arbitrary value
161+
162+
// Block must same number of rows as before Reserve
163+
EXPECT_EQ(initial_rows_count, block.GetRowCount());
164+
165+
size_t i = 0;
166+
for (const auto & c : block) {
167+
const auto & [expected_name, expected_column, expected_values] = expected_columns_description[i];
168+
SCOPED_TRACE(testing::Message("col #") << c.ColumnIndex() << " \"" << c.Name() << "\"");
169+
170+
// MUST have same column name
171+
EXPECT_EQ(expected_name, c.Name());
172+
173+
// MUST be same column object
174+
EXPECT_EQ(expected_column, c.Column().get());
175+
176+
// column MUST have the same values
177+
EXPECT_TRUE(CompareRecursive(*expected_values, *c.Column()));
178+
179+
++i;
180+
}
181+
}

ut/utils.cpp

Lines changed: 143 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,15 @@
1919
#include <clickhouse/base/socket.h> // for ipv4-ipv6 platform-specific stuff
2020

2121
#include <cinttypes>
22+
#include <cstdint>
23+
#include <ctime>
2224
#include <iomanip>
2325
#include <sstream>
26+
#include <stdexcept>
2427
#include <type_traits>
28+
#include "clickhouse/types/types.h"
29+
#include "absl/numeric/int128.h"
30+
2531

2632

2733
namespace {
@@ -42,7 +48,7 @@ struct DateTimeValue {
4248
};
4349

4450
std::ostream& operator<<(std::ostream & ostr, const DateTimeValue & time) {
45-
const auto t = std::localtime(&time.value);
51+
const auto t = std::gmtime(&time.value);
4652
char buffer[] = "2015-05-18 07:40:12\0\0";
4753
std::strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", t);
4854

@@ -173,6 +179,7 @@ std::ostream & printColumnValue(const ColumnRef& c, const size_t row, std::ostre
173179
|| doPrintValue<ColumnEnum8>(c, row, ostr)
174180
|| doPrintValue<ColumnEnum16>(c, row, ostr)
175181
|| doPrintValue<ColumnDate, DateTimeValue>(c, row, ostr)
182+
|| doPrintValue<ColumnDate32, DateTimeValue>(c, row, ostr)
176183
|| doPrintValue<ColumnDateTime, DateTimeValue>(c, row, ostr)
177184
|| doPrintValue<ColumnDateTime64, DateTimeValue>(c, row, ostr)
178185
|| doPrintValue<ColumnDecimal>(c, row, ostr)
@@ -332,6 +339,141 @@ std::ostream & operator<<(std::ostream & ostr, const Progress & progress) {
332339
<< " written_bytes : " << progress.written_bytes;
333340
}
334341

342+
std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) {
343+
ostr << "ItemView {" << clickhouse::Type::TypeName(item_view.type) << " : ";
344+
345+
switch (item_view.type) {
346+
case Type::Void:
347+
ostr << "--void--";
348+
break;
349+
case Type::Int8:
350+
ostr << static_cast<int>(item_view.get<int8_t>());
351+
break;
352+
case Type::Int16:
353+
ostr << static_cast<int>(item_view.get<int16_t>());
354+
break;
355+
case Type::Int32:
356+
ostr << static_cast<int>(item_view.get<int32_t>());
357+
break;
358+
case Type::Int64:
359+
ostr << item_view.get<int64_t>();
360+
break;
361+
case Type::UInt8:
362+
ostr << static_cast<unsigned int>(item_view.get<uint8_t>());
363+
break;
364+
case Type::UInt16:
365+
ostr << static_cast<unsigned int>(item_view.get<uint16_t>());
366+
break;
367+
case Type::UInt32:
368+
ostr << static_cast<unsigned int>(item_view.get<uint32_t>());
369+
break;
370+
case Type::UInt64:
371+
ostr << item_view.get<uint64_t>();
372+
break;
373+
case Type::Float32:
374+
ostr << static_cast<float>(item_view.get<float>());
375+
break;
376+
case Type::Float64:
377+
ostr << static_cast<double>(item_view.get<double>());
378+
break;
379+
case Type::String:
380+
case Type::FixedString:
381+
ostr << "\"" << item_view.data << "\" (" << item_view.data.size() << " bytes)";
382+
break;
383+
case Type::Date:
384+
ostr << DateTimeValue(item_view.get<uint16_t>() * 86400);
385+
break;
386+
case Type::Date32:
387+
ostr << DateTimeValue(item_view.get<int32_t>());
388+
break;
389+
case Type::DateTime:
390+
ostr << DateTimeValue(item_view.get<uint32_t>());
391+
break;
392+
case Type::DateTime64: {
393+
if (item_view.data.size() == sizeof(int32_t)) {
394+
ostr << DateTimeValue(item_view.get<int32_t>());
395+
}
396+
else if (item_view.data.size() == sizeof(int64_t)) {
397+
ostr << DateTimeValue(item_view.get<int64_t>());
398+
}
399+
else if (item_view.data.size() == sizeof(Int128)) {
400+
ostr << DateTimeValue(item_view.get<Int128>());
401+
}
402+
else {
403+
throw std::runtime_error("Invalid data size of ItemView of type DateTime64");
404+
}
405+
break;
406+
}
407+
case Type::Enum8:
408+
ostr << static_cast<int>(item_view.get<int8_t>());
409+
break;
410+
case Type::Enum16:
411+
ostr << static_cast<int>(item_view.get<int16_t>());
412+
break;
413+
case Type::UUID: {
414+
const auto & uuid_vals = reinterpret_cast<const uint64_t*>(item_view.data.data());
415+
ostr << ToString(clickhouse::UUID{uuid_vals[0], uuid_vals[1]});
416+
break;
417+
}
418+
case Type::IPv4: {
419+
in_addr addr;
420+
addr.s_addr = ntohl(item_view.get<uint32_t>());
421+
ostr << addr;
422+
break;
423+
}
424+
case Type::IPv6:
425+
ostr << *reinterpret_cast<const in6_addr*>(item_view.AsBinaryData().data());
426+
break;
427+
case Type::Int128:
428+
ostr << item_view.get<Int128>();
429+
break;
430+
case Type::UInt128:
431+
ostr << item_view.get<UInt128>();
432+
break;
433+
case Type::Decimal: {
434+
if (item_view.data.size() == sizeof(int32_t)) {
435+
ostr << item_view.get<int32_t>();
436+
}
437+
else if (item_view.data.size() == sizeof(int64_t)) {
438+
ostr << item_view.get<int64_t>();
439+
}
440+
else if (item_view.data.size() == sizeof(Int128)) {
441+
ostr << item_view.get<Int128>();
442+
}
443+
else {
444+
throw std::runtime_error("Invalid data size of ItemView of type Decimal");
445+
}
446+
}
447+
break;
448+
case Type::Decimal32:
449+
ostr << DateTimeValue(item_view.get<int32_t>());
450+
break;
451+
case Type::Decimal64:
452+
ostr << DateTimeValue(item_view.get<int64_t>());
453+
break;
454+
case Type::Decimal128:
455+
ostr << DateTimeValue(item_view.get<Int128>());
456+
break;
457+
// Unsupported types. i.e. there shouldn't be `ItemView`s of those types in practice.
458+
// either because GetItem() is not implemented for corresponding column type
459+
// OR this type code is never used, for `ItemView`s (but type code of wrapped column is).
460+
case Type::LowCardinality:
461+
case Type::Array:
462+
case Type::Nullable:
463+
case Type::Tuple:
464+
case Type::Map:
465+
case Type::Point:
466+
case Type::Ring:
467+
case Type::Polygon:
468+
case Type::MultiPolygon: {
469+
throw std::runtime_error("Invalid data size of ItemView of type " + std::string(Type::TypeName(item_view.type)));
470+
}
471+
};
472+
473+
return ostr << "}";
474+
}
475+
476+
335477
}
336478

337479
uint64_t versionNumber(const ServerInfo & server_info) {

ut/utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <clickhouse/base/platform.h>
44
#include <clickhouse/base/uuid.h>
55

6+
#include "clickhouse/columns/itemview.h"
67
#include "clickhouse/query.h"
78
#include "utils_meta.h"
89
#include "utils_comparison.h"
@@ -144,6 +145,7 @@ std::ostream& operator<<(std::ostream & ostr, const Type & type);
144145
std::ostream & operator<<(std::ostream & ostr, const ServerInfo & server_info);
145146
std::ostream & operator<<(std::ostream & ostr, const Profile & profile);
146147
std::ostream & operator<<(std::ostream & ostr, const Progress & progress);
148+
std::ostream & operator<<(std::ostream& ostr, const ItemView& item_view);
147149
}
148150

149151
std::ostream& operator<<(std::ostream & ostr, const PrettyPrintBlock & block);

0 commit comments

Comments
 (0)