Skip to content

Commit c816a39

Browse files
committed
[OAP][5962] Support struct schema evolution matching by name
1 parent 2337127 commit c816a39

18 files changed

+395
-37
lines changed

velox/connectors/hive/SplitReader.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -439,12 +439,18 @@ std::vector<TypePtr> SplitReader::adaptColumns(
439439
auto fileTypeIdx = fileType->getChildIdxIfExists(fieldName);
440440
if (!fileTypeIdx.has_value()) {
441441
// Column is missing. Most likely due to schema evolution.
442-
VELOX_CHECK(tableSchema, "Unable to resolve column '{}'", fieldName);
442+
auto outputTypeIdx = readerOutputType_->getChildIdxIfExists(fieldName);
443+
TypePtr fieldType;
444+
if (outputTypeIdx.has_value()) {
445+
// Field name exists in the user-specified output type.
446+
fieldType = readerOutputType_->childAt(outputTypeIdx.value());
447+
} else {
448+
VELOX_CHECK(tableSchema, "Unable to resolve column '{}'", fieldName);
449+
fieldType = tableSchema->findChild(fieldName);
450+
}
443451
childSpec->setConstantValue(
444452
BaseVector::createNullConstant(
445-
tableSchema->findChild(fieldName),
446-
1,
447-
connectorQueryCtx_->memoryPool()));
453+
fieldType, 1, connectorQueryCtx_->memoryPool()));
448454
} else {
449455
// Column no longer missing, reset constant value set on the spec.
450456
childSpec->setConstantValue(nullptr);

velox/dwio/common/ScanSpec.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ bool ScanSpec::hasFilter() const {
157157
if (hasFilter_.has_value()) {
158158
return hasFilter_.value();
159159
}
160-
if (!isConstant() && filter()) {
160+
if (filter()) {
161161
hasFilter_ = true;
162162
return true;
163163
}

velox/dwio/common/SelectiveFlatMapColumnReader.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@ namespace facebook::velox::dwio::common {
2424
class SelectiveFlatMapColumnReader : public SelectiveStructColumnReaderBase {
2525
protected:
2626
SelectiveFlatMapColumnReader(
27+
const dwio::common::ColumnReaderOptions& columnReaderOptions,
2728
const TypePtr& requestedType,
2829
const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
2930
FormatParams& params,
3031
velox::common::ScanSpec& scanSpec)
3132
: SelectiveStructColumnReaderBase(
33+
columnReaderOptions,
3234
requestedType,
3335
fileType,
3436
params,

velox/dwio/common/SelectiveStructColumnReader.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,6 @@ void SelectiveStructColumnReaderBase::read(
425425
}
426426

427427
const auto& childSpecs = scanSpec_->children();
428-
VELOX_CHECK(!childSpecs.empty());
429428
for (size_t i = 0; i < childSpecs.size(); ++i) {
430429
const auto& childSpec = childSpecs[i];
431430
VELOX_TRACE_HISTORY_PUSH("read %s", childSpec->fieldName().c_str());
@@ -525,9 +524,12 @@ bool SelectiveStructColumnReaderBase::isChildMissing(
525524
// row type that doesn't exist
526525
// in the output.
527526
fileType_->type()->kind() !=
528-
TypeKind::MAP && // If this is the case it means this is a flat map,
529-
// so it can't have "missing" fields.
530-
childSpec.channel() >= fileType_->size());
527+
TypeKind::MAP // If this is the case it means this is a flat map,
528+
// so it can't have "missing" fields.
529+
) &&
530+
(columnReaderOptions_.useColumnNamesForColumnMapping_
531+
? !asRowType(fileType_->type())->containsChild(childSpec.fieldName())
532+
: childSpec.channel() >= fileType_->size());
531533
}
532534

533535
std::unique_ptr<velox::dwio::common::ColumnLoader>
@@ -539,7 +541,6 @@ SelectiveStructColumnReaderBase::makeColumnLoader(vector_size_t index) {
539541
void SelectiveStructColumnReaderBase::getValues(
540542
const RowSet& rows,
541543
VectorPtr* result) {
542-
VELOX_CHECK(!scanSpec_->children().empty());
543544
VELOX_CHECK_NOT_NULL(
544545
*result, "SelectiveStructColumnReaderBase expects a non-null result");
545546
VELOX_CHECK(

velox/dwio/common/SelectiveStructColumnReader.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#pragma once
1818

19+
#include "velox/dwio/common/Options.h"
1920
#include "velox/dwio/common/SelectiveColumnReaderInternal.h"
2021

2122
namespace facebook::velox::dwio::common {
@@ -113,13 +114,15 @@ class SelectiveStructColumnReaderBase : public SelectiveColumnReader {
113114
static constexpr int32_t kConstantChildSpecSubscript{-1};
114115

115116
SelectiveStructColumnReaderBase(
117+
const dwio::common::ColumnReaderOptions& columnReaderOptions,
116118
const TypePtr& requestedType,
117119
const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
118120
FormatParams& params,
119121
velox::common::ScanSpec& scanSpec,
120122
bool isRoot = false,
121123
bool generateLazyChildren = true)
122124
: SelectiveColumnReader(requestedType, fileType, params, scanSpec),
125+
columnReaderOptions_(columnReaderOptions),
123126
debugString_(
124127
getExceptionContext().message(VeloxException::Type::kSystem)),
125128
isRoot_(isRoot),
@@ -180,6 +183,8 @@ class SelectiveStructColumnReaderBase : public SelectiveColumnReader {
180183
}
181184
}
182185

186+
const dwio::common::ColumnReaderOptions& columnReaderOptions_;
187+
183188
// Context information obtained from ExceptionContext. Stored here
184189
// so that LazyVector readers under this can add this to their
185190
// ExceptionContext. Allows contextualizing reader errors to split

velox/dwio/dwrf/reader/DwrfReader.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,13 +320,14 @@ DwrfRowReader::DwrfRowReader(
320320
makeProjectedNodes(*getReader().schemaWithId(), *projectedNodes_);
321321
}
322322

323+
// Reader options must be configured before calling 'getUnitLoader()',
324+
// which triggers 'SelectiveDwrfReader::build'.
325+
columnReaderOptions_ = dwio::common::makeColumnReaderOptions(
326+
readerBaseShared()->readerOptions());
323327
unitLoader_ = getUnitLoader();
324328
if (!emptyFile()) {
325329
getReader().loadCache();
326330
}
327-
328-
columnReaderOptions_ = dwio::common::makeColumnReaderOptions(
329-
readerBaseShared()->readerOptions());
330331
}
331332

332333
std::unique_ptr<ColumnReader>& DwrfRowReader::getColumnReader() {

velox/dwio/dwrf/reader/SelectiveFlatMapColumnReader.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ class SelectiveFlatMapAsStructReader : public SelectiveStructColumnReaderBase {
203203
DwrfParams& params,
204204
common::ScanSpec& scanSpec)
205205
: SelectiveStructColumnReaderBase(
206+
columnReaderOptions,
206207
requestedType,
207208
fileType,
208209
params,
@@ -242,6 +243,7 @@ class SelectiveFlatMapAsMapReader : public SelectiveStructColumnReaderBase {
242243
DwrfParams& params,
243244
common::ScanSpec& scanSpec)
244245
: SelectiveStructColumnReaderBase(
246+
columnReaderOptions,
245247
requestedType,
246248
fileType,
247249
params,
@@ -286,6 +288,7 @@ class SelectiveFlatMapReader
286288
DwrfParams& params,
287289
common::ScanSpec& scanSpec)
288290
: dwio::common::SelectiveFlatMapColumnReader(
291+
columnReaderOptions,
289292
requestedType,
290293
fileType,
291294
params,

velox/dwio/dwrf/reader/SelectiveStructColumnReader.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ SelectiveStructColumnReader::SelectiveStructColumnReader(
3131
common::ScanSpec& scanSpec,
3232
bool isRoot)
3333
: SelectiveStructColumnReaderBase(
34+
columnReaderOptions,
3435
requestedType,
3536
fileType,
3637
params,

velox/dwio/dwrf/reader/SelectiveStructColumnReader.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@ class SelectiveStructColumnReaderBase
2525
: public dwio::common::SelectiveStructColumnReaderBase {
2626
public:
2727
SelectiveStructColumnReaderBase(
28+
const dwio::common::ColumnReaderOptions& columnReaderOptions,
2829
const TypePtr& requestedType,
2930
const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
3031
DwrfParams& params,
3132
common::ScanSpec& scanSpec,
3233
bool isRoot = false,
3334
bool generateLazyChildren = true)
3435
: dwio::common::SelectiveStructColumnReaderBase(
36+
columnReaderOptions,
3537
requestedType,
3638
fileType,
3739
params,

velox/dwio/parquet/reader/ParquetColumnReader.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ std::unique_ptr<dwio::common::SelectiveColumnReader> ParquetColumnReader::build(
3838
const TypePtr& requestedType,
3939
const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
4040
ParquetParams& params,
41-
common::ScanSpec& scanSpec) {
41+
common::ScanSpec& scanSpec,
42+
memory::MemoryPool& pool) {
4243
auto colName = scanSpec.fieldName();
4344

4445
switch (fileType->type()->kind()) {
@@ -59,7 +60,7 @@ std::unique_ptr<dwio::common::SelectiveColumnReader> ParquetColumnReader::build(
5960

6061
case TypeKind::ROW:
6162
return std::make_unique<StructColumnReader>(
62-
columnReaderOptions, requestedType, fileType, params, scanSpec);
63+
columnReaderOptions, requestedType, fileType, params, scanSpec, pool);
6364

6465
case TypeKind::VARBINARY:
6566
case TypeKind::VARCHAR:
@@ -68,12 +69,11 @@ std::unique_ptr<dwio::common::SelectiveColumnReader> ParquetColumnReader::build(
6869
case TypeKind::ARRAY: {
6970
VELOX_CHECK(requestedType->isArray(), "Requested type must be array");
7071
return std::make_unique<ListColumnReader>(
71-
columnReaderOptions, requestedType, fileType, params, scanSpec);
72+
columnReaderOptions, requestedType, fileType, params, scanSpec, pool);
7273
}
73-
7474
case TypeKind::MAP:
7575
return std::make_unique<MapColumnReader>(
76-
columnReaderOptions, requestedType, fileType, params, scanSpec);
76+
columnReaderOptions, requestedType, fileType, params, scanSpec, pool);
7777

7878
case TypeKind::BOOLEAN:
7979
return std::make_unique<BooleanColumnReader>(

0 commit comments

Comments
 (0)