Skip to content

Commit f2cf163

Browse files
committed
Fix $files partition column construction
During partition column construction, the referenced field(s) were not in the correct order which could cause IllegalArgumentException. The FilesTablePageSource was updated to use field id look ups instead of relying solely on order.
1 parent b3310ba commit f2cf163

File tree

3 files changed

+52
-9
lines changed

3 files changed

+52
-9
lines changed

plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/StructLikeWrapperWithFieldIdToIndex.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import com.google.common.annotations.VisibleForTesting;
1717
import com.google.common.collect.ImmutableMap;
1818
import org.apache.iceberg.FileScanTask;
19+
import org.apache.iceberg.StructLike;
1920
import org.apache.iceberg.types.Types;
2021
import org.apache.iceberg.util.StructLikeWrapper;
2122

@@ -31,8 +32,12 @@ public class StructLikeWrapperWithFieldIdToIndex
3132

3233
public static StructLikeWrapperWithFieldIdToIndex createStructLikeWrapper(FileScanTask fileScanTask)
3334
{
34-
Types.StructType structType = fileScanTask.spec().partitionType();
35-
StructLikeWrapper partitionWrapper = StructLikeWrapper.forType(structType).set(fileScanTask.file().partition());
35+
return createStructLikeWrapper(fileScanTask.spec().partitionType(), fileScanTask.file().partition());
36+
}
37+
38+
public static StructLikeWrapperWithFieldIdToIndex createStructLikeWrapper(Types.StructType structType, StructLike partition)
39+
{
40+
StructLikeWrapper partitionWrapper = StructLikeWrapper.forType(structType).set(partition);
3641
return new StructLikeWrapperWithFieldIdToIndex(partitionWrapper, structType);
3742
}
3843

plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/system/files/FilesTablePageSource.java

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import io.airlift.slice.Slices;
1818
import io.trino.filesystem.TrinoFileSystem;
1919
import io.trino.plugin.iceberg.IcebergUtil;
20+
import io.trino.plugin.iceberg.StructLikeWrapperWithFieldIdToIndex;
2021
import io.trino.plugin.iceberg.fileio.ForwardingFileIoFactory;
2122
import io.trino.plugin.iceberg.system.FilesTable;
2223
import io.trino.plugin.iceberg.system.IcebergPartitionColumn;
@@ -63,6 +64,7 @@
6364
import static io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino;
6465
import static io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes;
6566
import static io.trino.plugin.iceberg.IcebergUtil.readerForManifest;
67+
import static io.trino.plugin.iceberg.StructLikeWrapperWithFieldIdToIndex.createStructLikeWrapper;
6668
import static io.trino.plugin.iceberg.system.FilesTable.COLUMN_SIZES_COLUMN_NAME;
6769
import static io.trino.plugin.iceberg.system.FilesTable.CONTENT_COLUMN_NAME;
6870
import static io.trino.plugin.iceberg.system.FilesTable.EQUALITY_IDS_COLUMN_NAME;
@@ -101,6 +103,7 @@ public final class FilesTablePageSource
101103
private final Schema schema;
102104
private final Schema metadataSchema;
103105
private final Map<Integer, PrimitiveType> idToTypeMapping;
106+
private final Map<Integer, PartitionSpec> idToPartitionSpecMapping;
104107
private final List<PartitionField> partitionFields;
105108
private final Optional<IcebergPartitionColumn> partitionColumnType;
106109
private final List<Types.NestedField> primitiveFields;
@@ -123,15 +126,15 @@ public FilesTablePageSource(
123126
this.schema = SchemaParser.fromJson(requireNonNull(split.schemaJson(), "schema is null"));
124127
this.metadataSchema = SchemaParser.fromJson(requireNonNull(split.metadataTableJson(), "metadataSchema is null"));
125128
this.idToTypeMapping = primitiveFieldTypes(schema);
126-
Map<Integer, PartitionSpec> specs = split.partitionSpecsByIdJson().entrySet().stream().collect(toImmutableMap(
129+
this.idToPartitionSpecMapping = split.partitionSpecsByIdJson().entrySet().stream().collect(toImmutableMap(
127130
Map.Entry::getKey,
128-
entry -> PartitionSpecParser.fromJson(SchemaParser.fromJson(split.schemaJson()), entry.getValue())));
129-
this.partitionFields = getAllPartitionFields(schema, specs);
131+
entry -> PartitionSpecParser.fromJson(schema, entry.getValue())));
132+
this.partitionFields = getAllPartitionFields(schema, idToPartitionSpecMapping);
130133
this.partitionColumnType = getPartitionColumnType(typeManager, partitionFields, schema);
131134
this.primitiveFields = IcebergUtil.primitiveFields(schema).stream()
132135
.sorted(Comparator.comparing(Types.NestedField::name))
133136
.collect(toImmutableList());
134-
ManifestReader<? extends ContentFile<?>> manifestReader = closer.register(readerForManifest(split.manifestFile(), fileIoFactory.create(trinoFileSystem), specs));
137+
ManifestReader<? extends ContentFile<?>> manifestReader = closer.register(readerForManifest(split.manifestFile(), fileIoFactory.create(trinoFileSystem), idToPartitionSpecMapping));
135138
// TODO figure out why selecting the specific column causes null to be returned for offset_splits
136139
this.contentIterator = closer.register(requireNonNull(manifestReader, "manifestReader is null").iterator());
137140
this.pageBuilder = new PageBuilder(requiredColumns.stream().map(column -> {
@@ -195,20 +198,26 @@ public SourcePage getNextSourcePage()
195198
writeValueOrNull(pageBuilder, SPEC_ID_COLUMN_NAME, contentFile::specId, INTEGER::writeInt);
196199
// partitions
197200
if (partitionColumnType.isPresent() && columnNameToIndex.containsKey(FilesTable.PARTITION_COLUMN_NAME)) {
201+
PartitionSpec partitionSpec = idToPartitionSpecMapping.get(contentFile.specId());
202+
StructLikeWrapperWithFieldIdToIndex partitionStruct = createStructLikeWrapper(partitionSpec.partitionType(), contentFile.partition());
198203
List<Type> partitionTypes = partitionTypes(partitionFields, idToTypeMapping);
204+
List<? extends Class<?>> partitionColumnClass = partitionTypes.stream()
205+
.map(type -> type.typeId().javaClass())
206+
.collect(toImmutableList());
199207
List<io.trino.spi.type.Type> partitionColumnTypes = partitionColumnType.orElseThrow().rowType().getFields().stream()
200208
.map(RowType.Field::getType)
201209
.collect(toImmutableList());
202210

203211
if (pageBuilder.getBlockBuilder(columnNameToIndex.get(FilesTable.PARTITION_COLUMN_NAME)) instanceof RowBlockBuilder rowBlockBuilder) {
204212
rowBlockBuilder.buildEntry(fields -> {
205213
for (int i = 0; i < partitionColumnTypes.size(); i++) {
206-
Type type = partitionTypes.get(i);
207214
io.trino.spi.type.Type trinoType = partitionColumnType.get().rowType().getFields().get(i).getType();
208215
Object value = null;
209216
Integer fieldId = partitionColumnType.get().fieldIds().get(i);
210-
if (fieldId != null) {
211-
value = convertIcebergValueToTrino(type, contentFile.partition().get(i, type.typeId().javaClass()));
217+
if (partitionStruct.getFieldIdToIndex().containsKey(fieldId)) {
218+
value = convertIcebergValueToTrino(
219+
partitionTypes.get(i),
220+
partitionStruct.getStructLikeWrapper().get().get(partitionStruct.getFieldIdToIndex().get(fieldId), partitionColumnClass.get(i)));
212221
}
213222
writeNativeValue(trinoType, fields.get(i), value);
214223
}

plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergPartitionEvolution.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
package io.trino.plugin.iceberg;
1515

1616
import com.google.common.collect.ImmutableList;
17+
import com.google.common.collect.Lists;
1718
import io.trino.testing.AbstractTestQueryFramework;
1819
import io.trino.testing.MaterializedRow;
1920
import io.trino.testing.QueryRunner;
@@ -39,6 +40,34 @@ protected QueryRunner createQueryRunner()
3940
.build();
4041
}
4142

43+
@Test
44+
public void testSelectPartition()
45+
{
46+
String tableName = "test_select_partition_from_files" + randomNameSuffix();
47+
assertUpdate("CREATE TABLE " + tableName + " WITH (partitioning = ARRAY['regionkey', 'truncate(name, 1)']) AS SELECT * FROM nation WHERE nationkey <= 2", 3);
48+
assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES partitioning = DEFAULT");
49+
assertUpdate("INSERT INTO " + tableName + " SELECT * FROM nation WHERE nationkey <= 2", 3);
50+
assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES partitioning = ARRAY['nationkey', 'regionkey', 'truncate(name, 1)']");
51+
assertUpdate("INSERT INTO " + tableName + " SELECT * FROM nation WHERE nationkey <= 2", 3);
52+
53+
List<MaterializedRow> files = computeActual("SELECT partition FROM \"" + tableName + "$files\"").getMaterializedRows();
54+
assertThat(files).hasSize(7);
55+
List<List<Object>> partitionColumn = files.stream().map(materializedRow -> ((MaterializedRow) materializedRow.getField(0)).getFields()).toList();
56+
assertThat(partitionColumn).containsExactlyInAnyOrder(
57+
// ['regionkey', 'truncate(name, 1)']
58+
Lists.newArrayList(1L, "B", null),
59+
Lists.newArrayList(0L, "A", null),
60+
Lists.newArrayList(1L, "A", null),
61+
// DEFAULT
62+
Lists.newArrayList(null, null, null),
63+
// ['nationkey', 'regionkey', 'truncate(name, 1)']
64+
Lists.newArrayList(1L, "B", 2L),
65+
Lists.newArrayList(0L, "A", 0L),
66+
Lists.newArrayList(1L, "A", 1L));
67+
assertQuery("SELECT * FROM " + tableName, "SELECT * FROM nation");
68+
assertUpdate("DROP TABLE " + tableName);
69+
}
70+
4271
@Test
4372
public void testRemovePartitioning()
4473
{

0 commit comments

Comments
 (0)