Skip to content

Commit fb52bca

Browse files
mbasmanovafacebook-github-bot
authored andcommitted
feat: Upgrade variant-to-vector conversion capability (facebookincubator#14011)
Summary: Pull Request resolved: facebookincubator#14011 There used to be limited API to convert variant array to vector. Now we can convert any variant to vector. This code comes from an exiting closed source Velox application. Reviewed By: kKPulla Differential Revision: D77762167 fbshipit-source-id: 1b976d328ab468ec3e21f7b45d91f7a8b42fb13b
1 parent c3c8008 commit fb52bca

File tree

4 files changed

+398
-171
lines changed

4 files changed

+398
-171
lines changed

velox/parse/Expressions.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,8 @@ TypedExprPtr Expressions::inferTypes(
265265
constantVector =
266266
BaseVector::createNullConstant(constant->type(), 1, pool);
267267
} else {
268-
auto arrayVector = variantArrayToVector(
269-
constant->type(), constant->value().array(), pool);
270-
constantVector = std::make_shared<ConstantVector<velox::ComplexType>>(
271-
pool, 1, 0, arrayVector);
268+
constantVector =
269+
variantToVector(constant->type(), constant->value(), pool);
272270
}
273271
return std::make_shared<ConstantTypedExpr>(constantVector);
274272
}

velox/vector/VariantToVector.cpp

Lines changed: 232 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -15,66 +15,252 @@
1515
*/
1616

1717
#include "velox/vector/VariantToVector.h"
18+
#include "velox/vector/ComplexVector.h"
1819
#include "velox/vector/FlatVector.h"
1920

20-
namespace facebook::velox::core {
21+
namespace facebook::velox {
2122
namespace {
2223

24+
VectorPtr callMakeVector(
25+
TypePtr type,
26+
const std::vector<Variant>& data,
27+
memory::MemoryPool* pool);
28+
29+
template <TypeKind KIND, typename = void>
30+
struct VariantToVector {
31+
static VectorPtr makeVector(
32+
TypePtr type,
33+
const std::vector<Variant>& /*data*/,
34+
memory::MemoryPool* /*pool*/) {
35+
VELOX_NYI("Type not supported: {}", type->toString());
36+
}
37+
};
38+
39+
template <>
40+
struct VariantToVector<TypeKind::HUGEINT> {
41+
static VectorPtr makeVector(
42+
TypePtr type,
43+
const std::vector<Variant>& /*data*/,
44+
memory::MemoryPool* /*pool*/) {
45+
VELOX_NYI("Type not supported: {}", type->toString());
46+
}
47+
};
48+
2349
template <TypeKind KIND>
24-
ArrayVectorPtr variantArrayToVectorImpl(
25-
const TypePtr& arrayType,
26-
const std::vector<Variant>& variantArray,
27-
velox::memory::MemoryPool* pool) {
28-
using T = typename TypeTraits<KIND>::NativeType;
29-
30-
// First generate internal arrayVector elements.
31-
const size_t variantArraySize = variantArray.size();
32-
33-
// Create array elements flat vector.
34-
auto arrayElements = BaseVector::create<FlatVector<T>>(
35-
arrayType->childAt(0), variantArraySize, pool);
36-
37-
// Populate internal array elements (flat vector).
38-
for (vector_size_t i = 0; i < variantArraySize; i++) {
39-
const auto& value = variantArray[i];
40-
if (!value.isNull()) {
41-
// `getOwnedValue` copies the content to its internal buffers (in case of
42-
// string/StringView); no-op for other primitive types.
43-
arrayElements->set(i, T(value.value<KIND>()));
44-
} else {
45-
arrayElements->setNull(i, true);
50+
struct VariantToVector<
51+
KIND,
52+
std::enable_if_t<
53+
TypeTraits<KIND>::isFixedWidth || KIND == TypeKind::VARCHAR ||
54+
KIND == TypeKind::VARBINARY || KIND == TypeKind::OPAQUE,
55+
void>> {
56+
static constexpr bool kIsOpaque = (KIND == TypeKind::OPAQUE);
57+
static VectorPtr makeVector(
58+
TypePtr type,
59+
const std::vector<Variant>& data,
60+
memory::MemoryPool* pool) {
61+
using T = typename TypeTraits<KIND>::NativeType;
62+
63+
// Allocate nulls and data buffers and set all values to null by default.
64+
const vector_size_t dataSize = data.size();
65+
BufferPtr valuesBuffer = AlignedBuffer::allocate<T>(dataSize, pool);
66+
BufferPtr nulls = allocateNulls(dataSize, pool, bits::kNull);
67+
68+
// Create flat vector to store all the values.
69+
auto values = std::make_shared<FlatVector<T>>(
70+
pool,
71+
type,
72+
nulls,
73+
dataSize,
74+
std::move(valuesBuffer),
75+
std::vector<BufferPtr>());
76+
77+
// Populate data into flat vector.
78+
for (size_t i = 0; i < dataSize; i++) {
79+
if (!data[i].isNull()) {
80+
if constexpr (kIsOpaque) {
81+
values->set(i, T(data[i].value<KIND>().obj));
82+
} else {
83+
values->set(i, T(data[i].value<KIND>()));
84+
}
85+
}
4686
}
87+
return values;
4788
}
89+
};
4890

49-
// Create ArrayVector around the FlatVector containing array elements.
50-
BufferPtr offsets = allocateOffsets(1, pool);
51-
BufferPtr sizes = allocateSizes(1, pool);
91+
template <>
92+
struct VariantToVector<TypeKind::ARRAY> {
93+
static VectorPtr makeVector(
94+
TypePtr type,
95+
const std::vector<Variant>& data,
96+
memory::MemoryPool* pool) {
97+
// Create offsets, sizes and nulls buffers.
98+
vector_size_t size = data.size();
99+
BufferPtr offsets = allocateOffsets(size, pool);
100+
BufferPtr sizes = allocateSizes(size, pool);
101+
BufferPtr nulls = allocateNulls(size, pool);
102+
auto rawOffsets = offsets->asMutable<vector_size_t>();
103+
auto rawSizes = sizes->asMutable<vector_size_t>();
104+
auto rawNulls = nulls->asMutable<uint64_t>();
52105

53-
auto rawSizes = sizes->asMutable<vector_size_t>();
54-
rawSizes[0] = variantArraySize;
106+
// Iterate through array values and set values in offsets and sizes buffers.
107+
// Create container for underlying array elements to create an elements
108+
// vector.
109+
std::vector<Variant> elements;
110+
vector_size_t index = 0;
111+
vector_size_t nullCount = 0;
112+
for (size_t i = 0; i < data.size(); ++i) {
113+
auto isNull = data[i].isNull();
114+
*rawOffsets++ = index;
115+
*rawSizes++ = !isNull ? data[i].array().size() : 0;
116+
if (isNull) {
117+
++nullCount;
118+
bits::setNull(rawNulls, i, true);
119+
continue;
120+
}
121+
for (const auto& arrayElement : data[i].array()) {
122+
elements.push_back(arrayElement);
123+
++index;
124+
}
125+
}
126+
127+
// Create child elements vector with all the array values.
128+
TypePtr elementType = type->childAt(0);
129+
auto elementsVector = callMakeVector(elementType, elements, pool);
130+
131+
return std::make_shared<ArrayVector>(
132+
pool,
133+
type,
134+
nulls,
135+
size,
136+
offsets,
137+
sizes,
138+
std::move(elementsVector),
139+
nullCount);
140+
}
141+
};
142+
143+
template <>
144+
struct VariantToVector<TypeKind::MAP> {
145+
static VectorPtr makeVector(
146+
TypePtr type,
147+
const std::vector<Variant>& data,
148+
memory::MemoryPool* pool) {
149+
// Create offsets, sizes and nulls buffers.
150+
vector_size_t size = data.size();
151+
BufferPtr offsets = allocateOffsets(size, pool);
152+
BufferPtr sizes = allocateSizes(size, pool);
153+
BufferPtr nulls = allocateNulls(size, pool);
154+
auto rawOffsets = offsets->asMutable<vector_size_t>();
155+
auto rawSizes = sizes->asMutable<vector_size_t>();
156+
auto rawNulls = nulls->asMutable<uint64_t>();
157+
158+
// Iterate through map (key,value) pairs and set offsets, sizes and nulls
159+
// buffers. Create container for underlying key and value elements to create
160+
// child vectors.
161+
std::vector<Variant> keys;
162+
std::vector<Variant> values;
163+
vector_size_t index = 0;
164+
vector_size_t nullCount = 0;
165+
for (size_t i = 0; i < data.size(); ++i) {
166+
auto isNull = data[i].isNull();
167+
*rawOffsets++ = index;
168+
*rawSizes++ = !isNull ? data[i].map().size() : 0;
169+
if (isNull) {
170+
++nullCount;
171+
bits::setNull(rawNulls, i, true);
172+
continue;
173+
}
174+
for (const auto& [key, value] : data[i].map()) {
175+
keys.push_back(key);
176+
values.push_back(value);
177+
++index;
178+
}
179+
}
55180

56-
return std::make_shared<ArrayVector>(
57-
pool, arrayType, nullptr, 1, offsets, sizes, arrayElements);
181+
// Create keys and values vector with corresponding values.
182+
auto keysVector = callMakeVector(type->childAt(0), keys, pool);
183+
auto valuesVector = callMakeVector(type->childAt(1), values, pool);
184+
185+
return std::make_shared<MapVector>(
186+
pool,
187+
type,
188+
nulls,
189+
size,
190+
offsets,
191+
sizes,
192+
std::move(keysVector),
193+
std::move(valuesVector),
194+
nullCount);
195+
}
196+
};
197+
198+
template <>
199+
struct VariantToVector<TypeKind::ROW> {
200+
static VectorPtr makeVector(
201+
TypePtr type,
202+
const std::vector<Variant>& data,
203+
memory::MemoryPool* pool) {
204+
vector_size_t size = data.size();
205+
BufferPtr nulls = allocateNulls(size, pool);
206+
auto rawNulls = nulls->asMutable<uint64_t>();
207+
208+
auto childCount = type->size();
209+
std::vector<std::vector<Variant>> children;
210+
children.reserve(childCount);
211+
for (size_t i = 0; i < childCount; ++i) {
212+
std::vector<Variant> child;
213+
child.reserve(size);
214+
children.push_back(child);
215+
}
216+
217+
// Populate data for each of the columns.
218+
for (size_t i = 0; i < data.size(); ++i) {
219+
if (data[i].isNull()) {
220+
bits::setNull(rawNulls, i, true);
221+
continue;
222+
}
223+
const auto& row = data[i].row();
224+
VELOX_CHECK_EQ(row.size(), children.size());
225+
for (size_t j = 0; j < row.size(); ++j) {
226+
children[j].push_back(row[j]);
227+
}
228+
}
229+
230+
std::vector<VectorPtr> childVectors;
231+
childVectors.reserve(childCount);
232+
for (size_t i = 0; i < childCount; ++i) {
233+
// @lint-ignore CLANGTIDY facebook-hte-LocalUncheckedArrayBounds
234+
childVectors.push_back(
235+
callMakeVector(type->childAt(i), children[i], pool));
236+
}
237+
238+
return std::make_shared<RowVector>(pool, type, nulls, size, childVectors);
239+
}
240+
};
241+
242+
VectorPtr callMakeVector(
243+
TypePtr type,
244+
const std::vector<Variant>& data,
245+
memory::MemoryPool* pool) {
246+
return VELOX_DYNAMIC_TYPE_DISPATCH_METHOD_ALL(
247+
VariantToVector, makeVector, type->kind(), type, data, pool);
58248
}
59-
} // namespace
60249

61-
ArrayVectorPtr variantArrayToVector(
62-
const TypePtr& arrayType,
63-
const std::vector<Variant>& variantArray,
64-
velox::memory::MemoryPool* pool) {
65-
VELOX_CHECK_EQ(TypeKind::ARRAY, arrayType->kind());
250+
} // namespace
66251

67-
if (arrayType->childAt(0)->isUnKnown()) {
68-
return variantArrayToVectorImpl<TypeKind::UNKNOWN>(
69-
arrayType, variantArray, pool);
252+
VectorPtr variantToVector(
253+
const TypePtr& type,
254+
const Variant& value,
255+
memory::MemoryPool* pool) {
256+
if (value.isNull()) {
257+
return BaseVector::createNullConstant(type, 1, pool);
258+
} else if (type->isPrimitiveType()) {
259+
return BaseVector::createConstant(type, value, 1, pool);
70260
}
71261

72-
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(
73-
variantArrayToVectorImpl,
74-
arrayType->childAt(0)->kind(),
75-
arrayType,
76-
variantArray,
77-
pool);
262+
auto variantVector = callMakeVector(type, {value}, pool);
263+
return BaseVector::wrapInConstant(1, 0, std::move(variantVector));
78264
}
79265

80-
} // namespace facebook::velox::core
266+
} // namespace facebook::velox

velox/vector/VariantToVector.h

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,21 @@
1515
*/
1616
#pragma once
1717

18-
#include "velox/vector/ComplexVector.h"
18+
#include "velox/vector/BaseVector.h"
1919

20-
namespace facebook::velox::core {
20+
namespace facebook::velox {
2121

22-
// Converts a sequence of values from a variant array to an ArrayVector. The
23-
// output ArrayVector contains one single row, which contains the elements
24-
// extracted from the input variant vector.
25-
ArrayVectorPtr variantArrayToVector(
26-
const TypePtr& arrayType,
27-
const std::vector<Variant>& variantArray,
28-
velox::memory::MemoryPool* pool);
22+
// Converts Variant `value` into a Velox vector using specified type.
23+
//
24+
// Supports all primitive types and complex types that do not contain DECIMAL
25+
// types.
26+
//
27+
// @returns ConstantVector of size 1.
28+
//
29+
// TODO Fold into BaseVector::createConstant API.
30+
VectorPtr variantToVector(
31+
const TypePtr& type,
32+
const Variant& value,
33+
memory::MemoryPool* pool);
2934

30-
} // namespace facebook::velox::core
35+
} // namespace facebook::velox

0 commit comments

Comments
 (0)