|
15 | 15 | */ |
16 | 16 |
|
17 | 17 | #include "velox/vector/VariantToVector.h" |
| 18 | +#include "velox/vector/ComplexVector.h" |
18 | 19 | #include "velox/vector/FlatVector.h" |
19 | 20 |
|
20 | | -namespace facebook::velox::core { |
| 21 | +namespace facebook::velox { |
21 | 22 | namespace { |
22 | 23 |
|
| 24 | +VectorPtr callMakeVector( |
| 25 | + TypePtr type, |
| 26 | + const std::vector<Variant>& data, |
| 27 | + memory::MemoryPool* pool); |
| 28 | + |
| 29 | +template <TypeKind KIND, typename = void> |
| 30 | +struct VariantToVector { |
| 31 | + static VectorPtr makeVector( |
| 32 | + TypePtr type, |
| 33 | + const std::vector<Variant>& /*data*/, |
| 34 | + memory::MemoryPool* /*pool*/) { |
| 35 | + VELOX_NYI("Type not supported: {}", type->toString()); |
| 36 | + } |
| 37 | +}; |
| 38 | + |
| 39 | +template <> |
| 40 | +struct VariantToVector<TypeKind::HUGEINT> { |
| 41 | + static VectorPtr makeVector( |
| 42 | + TypePtr type, |
| 43 | + const std::vector<Variant>& /*data*/, |
| 44 | + memory::MemoryPool* /*pool*/) { |
| 45 | + VELOX_NYI("Type not supported: {}", type->toString()); |
| 46 | + } |
| 47 | +}; |
| 48 | + |
23 | 49 | template <TypeKind KIND> |
24 | | -ArrayVectorPtr variantArrayToVectorImpl( |
25 | | - const TypePtr& arrayType, |
26 | | - const std::vector<Variant>& variantArray, |
27 | | - velox::memory::MemoryPool* pool) { |
28 | | - using T = typename TypeTraits<KIND>::NativeType; |
29 | | - |
30 | | - // First generate internal arrayVector elements. |
31 | | - const size_t variantArraySize = variantArray.size(); |
32 | | - |
33 | | - // Create array elements flat vector. |
34 | | - auto arrayElements = BaseVector::create<FlatVector<T>>( |
35 | | - arrayType->childAt(0), variantArraySize, pool); |
36 | | - |
37 | | - // Populate internal array elements (flat vector). |
38 | | - for (vector_size_t i = 0; i < variantArraySize; i++) { |
39 | | - const auto& value = variantArray[i]; |
40 | | - if (!value.isNull()) { |
41 | | - // `getOwnedValue` copies the content to its internal buffers (in case of |
42 | | - // string/StringView); no-op for other primitive types. |
43 | | - arrayElements->set(i, T(value.value<KIND>())); |
44 | | - } else { |
45 | | - arrayElements->setNull(i, true); |
| 50 | +struct VariantToVector< |
| 51 | + KIND, |
| 52 | + std::enable_if_t< |
| 53 | + TypeTraits<KIND>::isFixedWidth || KIND == TypeKind::VARCHAR || |
| 54 | + KIND == TypeKind::VARBINARY || KIND == TypeKind::OPAQUE, |
| 55 | + void>> { |
| 56 | + static constexpr bool kIsOpaque = (KIND == TypeKind::OPAQUE); |
| 57 | + static VectorPtr makeVector( |
| 58 | + TypePtr type, |
| 59 | + const std::vector<Variant>& data, |
| 60 | + memory::MemoryPool* pool) { |
| 61 | + using T = typename TypeTraits<KIND>::NativeType; |
| 62 | + |
| 63 | + // Allocate nulls and data buffers and set all values to null by default. |
| 64 | + const vector_size_t dataSize = data.size(); |
| 65 | + BufferPtr valuesBuffer = AlignedBuffer::allocate<T>(dataSize, pool); |
| 66 | + BufferPtr nulls = allocateNulls(dataSize, pool, bits::kNull); |
| 67 | + |
| 68 | + // Create flat vector to store all the values. |
| 69 | + auto values = std::make_shared<FlatVector<T>>( |
| 70 | + pool, |
| 71 | + type, |
| 72 | + nulls, |
| 73 | + dataSize, |
| 74 | + std::move(valuesBuffer), |
| 75 | + std::vector<BufferPtr>()); |
| 76 | + |
| 77 | + // Populate data into flat vector. |
| 78 | + for (size_t i = 0; i < dataSize; i++) { |
| 79 | + if (!data[i].isNull()) { |
| 80 | + if constexpr (kIsOpaque) { |
| 81 | + values->set(i, T(data[i].value<KIND>().obj)); |
| 82 | + } else { |
| 83 | + values->set(i, T(data[i].value<KIND>())); |
| 84 | + } |
| 85 | + } |
46 | 86 | } |
| 87 | + return values; |
47 | 88 | } |
| 89 | +}; |
48 | 90 |
|
49 | | - // Create ArrayVector around the FlatVector containing array elements. |
50 | | - BufferPtr offsets = allocateOffsets(1, pool); |
51 | | - BufferPtr sizes = allocateSizes(1, pool); |
| 91 | +template <> |
| 92 | +struct VariantToVector<TypeKind::ARRAY> { |
| 93 | + static VectorPtr makeVector( |
| 94 | + TypePtr type, |
| 95 | + const std::vector<Variant>& data, |
| 96 | + memory::MemoryPool* pool) { |
| 97 | + // Create offsets, sizes and nulls buffers. |
| 98 | + vector_size_t size = data.size(); |
| 99 | + BufferPtr offsets = allocateOffsets(size, pool); |
| 100 | + BufferPtr sizes = allocateSizes(size, pool); |
| 101 | + BufferPtr nulls = allocateNulls(size, pool); |
| 102 | + auto rawOffsets = offsets->asMutable<vector_size_t>(); |
| 103 | + auto rawSizes = sizes->asMutable<vector_size_t>(); |
| 104 | + auto rawNulls = nulls->asMutable<uint64_t>(); |
52 | 105 |
|
53 | | - auto rawSizes = sizes->asMutable<vector_size_t>(); |
54 | | - rawSizes[0] = variantArraySize; |
| 106 | + // Iterate through array values and set values in offsets and sizes buffers. |
| 107 | + // Create container for underlying array elements to create an elements |
| 108 | + // vector. |
| 109 | + std::vector<Variant> elements; |
| 110 | + vector_size_t index = 0; |
| 111 | + vector_size_t nullCount = 0; |
| 112 | + for (size_t i = 0; i < data.size(); ++i) { |
| 113 | + auto isNull = data[i].isNull(); |
| 114 | + *rawOffsets++ = index; |
| 115 | + *rawSizes++ = !isNull ? data[i].array().size() : 0; |
| 116 | + if (isNull) { |
| 117 | + ++nullCount; |
| 118 | + bits::setNull(rawNulls, i, true); |
| 119 | + continue; |
| 120 | + } |
| 121 | + for (const auto& arrayElement : data[i].array()) { |
| 122 | + elements.push_back(arrayElement); |
| 123 | + ++index; |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + // Create child elements vector with all the array values. |
| 128 | + TypePtr elementType = type->childAt(0); |
| 129 | + auto elementsVector = callMakeVector(elementType, elements, pool); |
| 130 | + |
| 131 | + return std::make_shared<ArrayVector>( |
| 132 | + pool, |
| 133 | + type, |
| 134 | + nulls, |
| 135 | + size, |
| 136 | + offsets, |
| 137 | + sizes, |
| 138 | + std::move(elementsVector), |
| 139 | + nullCount); |
| 140 | + } |
| 141 | +}; |
| 142 | + |
| 143 | +template <> |
| 144 | +struct VariantToVector<TypeKind::MAP> { |
| 145 | + static VectorPtr makeVector( |
| 146 | + TypePtr type, |
| 147 | + const std::vector<Variant>& data, |
| 148 | + memory::MemoryPool* pool) { |
| 149 | + // Create offsets, sizes and nulls buffers. |
| 150 | + vector_size_t size = data.size(); |
| 151 | + BufferPtr offsets = allocateOffsets(size, pool); |
| 152 | + BufferPtr sizes = allocateSizes(size, pool); |
| 153 | + BufferPtr nulls = allocateNulls(size, pool); |
| 154 | + auto rawOffsets = offsets->asMutable<vector_size_t>(); |
| 155 | + auto rawSizes = sizes->asMutable<vector_size_t>(); |
| 156 | + auto rawNulls = nulls->asMutable<uint64_t>(); |
| 157 | + |
| 158 | + // Iterate through map (key,value) pairs and set offsets, sizes and nulls |
| 159 | + // buffers. Create container for underlying key and value elements to create |
| 160 | + // child vectors. |
| 161 | + std::vector<Variant> keys; |
| 162 | + std::vector<Variant> values; |
| 163 | + vector_size_t index = 0; |
| 164 | + vector_size_t nullCount = 0; |
| 165 | + for (size_t i = 0; i < data.size(); ++i) { |
| 166 | + auto isNull = data[i].isNull(); |
| 167 | + *rawOffsets++ = index; |
| 168 | + *rawSizes++ = !isNull ? data[i].map().size() : 0; |
| 169 | + if (isNull) { |
| 170 | + ++nullCount; |
| 171 | + bits::setNull(rawNulls, i, true); |
| 172 | + continue; |
| 173 | + } |
| 174 | + for (const auto& [key, value] : data[i].map()) { |
| 175 | + keys.push_back(key); |
| 176 | + values.push_back(value); |
| 177 | + ++index; |
| 178 | + } |
| 179 | + } |
55 | 180 |
|
56 | | - return std::make_shared<ArrayVector>( |
57 | | - pool, arrayType, nullptr, 1, offsets, sizes, arrayElements); |
| 181 | + // Create keys and values vector with corresponding values. |
| 182 | + auto keysVector = callMakeVector(type->childAt(0), keys, pool); |
| 183 | + auto valuesVector = callMakeVector(type->childAt(1), values, pool); |
| 184 | + |
| 185 | + return std::make_shared<MapVector>( |
| 186 | + pool, |
| 187 | + type, |
| 188 | + nulls, |
| 189 | + size, |
| 190 | + offsets, |
| 191 | + sizes, |
| 192 | + std::move(keysVector), |
| 193 | + std::move(valuesVector), |
| 194 | + nullCount); |
| 195 | + } |
| 196 | +}; |
| 197 | + |
| 198 | +template <> |
| 199 | +struct VariantToVector<TypeKind::ROW> { |
| 200 | + static VectorPtr makeVector( |
| 201 | + TypePtr type, |
| 202 | + const std::vector<Variant>& data, |
| 203 | + memory::MemoryPool* pool) { |
| 204 | + vector_size_t size = data.size(); |
| 205 | + BufferPtr nulls = allocateNulls(size, pool); |
| 206 | + auto rawNulls = nulls->asMutable<uint64_t>(); |
| 207 | + |
| 208 | + auto childCount = type->size(); |
| 209 | + std::vector<std::vector<Variant>> children; |
| 210 | + children.reserve(childCount); |
| 211 | + for (size_t i = 0; i < childCount; ++i) { |
| 212 | + std::vector<Variant> child; |
| 213 | + child.reserve(size); |
| 214 | + children.push_back(child); |
| 215 | + } |
| 216 | + |
| 217 | + // Populate data for each of the columns. |
| 218 | + for (size_t i = 0; i < data.size(); ++i) { |
| 219 | + if (data[i].isNull()) { |
| 220 | + bits::setNull(rawNulls, i, true); |
| 221 | + continue; |
| 222 | + } |
| 223 | + const auto& row = data[i].row(); |
| 224 | + VELOX_CHECK_EQ(row.size(), children.size()); |
| 225 | + for (size_t j = 0; j < row.size(); ++j) { |
| 226 | + children[j].push_back(row[j]); |
| 227 | + } |
| 228 | + } |
| 229 | + |
| 230 | + std::vector<VectorPtr> childVectors; |
| 231 | + childVectors.reserve(childCount); |
| 232 | + for (size_t i = 0; i < childCount; ++i) { |
| 233 | + // @lint-ignore CLANGTIDY facebook-hte-LocalUncheckedArrayBounds |
| 234 | + childVectors.push_back( |
| 235 | + callMakeVector(type->childAt(i), children[i], pool)); |
| 236 | + } |
| 237 | + |
| 238 | + return std::make_shared<RowVector>(pool, type, nulls, size, childVectors); |
| 239 | + } |
| 240 | +}; |
| 241 | + |
| 242 | +VectorPtr callMakeVector( |
| 243 | + TypePtr type, |
| 244 | + const std::vector<Variant>& data, |
| 245 | + memory::MemoryPool* pool) { |
| 246 | + return VELOX_DYNAMIC_TYPE_DISPATCH_METHOD_ALL( |
| 247 | + VariantToVector, makeVector, type->kind(), type, data, pool); |
58 | 248 | } |
59 | | -} // namespace |
60 | 249 |
|
61 | | -ArrayVectorPtr variantArrayToVector( |
62 | | - const TypePtr& arrayType, |
63 | | - const std::vector<Variant>& variantArray, |
64 | | - velox::memory::MemoryPool* pool) { |
65 | | - VELOX_CHECK_EQ(TypeKind::ARRAY, arrayType->kind()); |
| 250 | +} // namespace |
66 | 251 |
|
67 | | - if (arrayType->childAt(0)->isUnKnown()) { |
68 | | - return variantArrayToVectorImpl<TypeKind::UNKNOWN>( |
69 | | - arrayType, variantArray, pool); |
| 252 | +VectorPtr variantToVector( |
| 253 | + const TypePtr& type, |
| 254 | + const Variant& value, |
| 255 | + memory::MemoryPool* pool) { |
| 256 | + if (value.isNull()) { |
| 257 | + return BaseVector::createNullConstant(type, 1, pool); |
| 258 | + } else if (type->isPrimitiveType()) { |
| 259 | + return BaseVector::createConstant(type, value, 1, pool); |
70 | 260 | } |
71 | 261 |
|
72 | | - return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( |
73 | | - variantArrayToVectorImpl, |
74 | | - arrayType->childAt(0)->kind(), |
75 | | - arrayType, |
76 | | - variantArray, |
77 | | - pool); |
| 262 | + auto variantVector = callMakeVector(type, {value}, pool); |
| 263 | + return BaseVector::wrapInConstant(1, 0, std::move(variantVector)); |
78 | 264 | } |
79 | 265 |
|
80 | | -} // namespace facebook::velox::core |
| 266 | +} // namespace facebook::velox |
0 commit comments