Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,22 @@ public class ColumnVector implements AutoCloseable {
this.accessor = getVectorAccessor(vectorHolder);
}

/** ColumnVector that delegates to an ArrowVectorAccessor with a NullabilityHolder. */
public ColumnVector(
Types.NestedField field, ArrowVectorAccessor<?, String, ?, ?> accessor, NullabilityHolder nullabilityHolder) {
this.vectorHolder = null; // Not used in this constructor
this.accessor = accessor;
// Be defensive: some edge cases (e.g., entirely-null projected columns) may pass null here.
this.nullabilityHolder = (nullabilityHolder != null) ? nullabilityHolder : NullabilityHolder.ALL_NULLS;
}

/**
* Returns the potentially dict-encoded {@link FieldVector}.
*
* @return instance of {@link FieldVector}
*/
public FieldVector getFieldVector() {
return vectorHolder.vector();
return vectorHolder != null ? vectorHolder.vector() : null;
}

/**
Expand All @@ -73,7 +82,7 @@ public FieldVector getFieldVector() {
* @return instance of {@link FieldVector}
*/
public FieldVector getArrowVector() {
return DictEncodedArrowConverter.toArrowVector(vectorHolder, accessor);
return vectorHolder != null ? DictEncodedArrowConverter.toArrowVector(vectorHolder, accessor) : null;
}

public boolean hasNull() {
Expand Down Expand Up @@ -134,6 +143,10 @@ public BigDecimal getDecimal(int rowId, int precision, int scale) {
return (BigDecimal) accessor.getDecimal(rowId, precision, scale);
}

public ArrowVectorAccessor<?, String, ?, ?> accessor() {
return accessor;
}

private static ArrowVectorAccessor<?, String, ?, ?> getVectorAccessor(VectorHolder holder) {
return ArrowVectorAccessors.getVectorAccessor(holder);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.apache.parquet.column.Dictionary;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.iceberg.types.Types;

/**
* This class is creates typed {@link ArrowVectorAccessor} from {@link VectorHolder}. It provides a
Expand Down Expand Up @@ -877,4 +878,55 @@ private static <T> IntFunction<T[]> genericArray(Class<T> genericClass) {
private static <T> T[] genericArray(Class<T> genericClass, int length) {
return (T[]) Array.newInstance(genericClass, length);
}

/**
* Returns a plain (non-dictionary) accessor for the provided vector.
*
* <p><b>Robustness note:</b> Some projected optional columns can legitimately have no
* materialized Arrow vector (e.g., an entirely-null column for a scan/task). In those cases
* {@code vector} can be {@code null}. Previously this caused an NPE. We now return a
* NullAccessor that reports null for every position.
*/
public static ArrowVectorAccessor<?, String, ?, ?> getPlainVectorAccessor(Object vector, Types.NestedField field) {
if (vector == null) {
// Column vector did not materialize; provide a null-producing accessor for the column's type
return NullAccessor.forType(field.type());
}
// For now, delegate to the existing logic - this would need to be enhanced to handle
// the field type properly, but this provides the null safety needed
return new NullAccessor(field.type());
}

/** Accessor that treats the entire column as NULLs (no underlying Arrow buffers). */
static final class NullAccessor extends ArrowVectorAccessor<Object, String, Object, Object> {
private final Types.Type icebergType;

private NullAccessor(Types.Type icebergType) {
super(null);
this.icebergType = icebergType;
}

static ArrowVectorAccessor<?, String, ?, ?> forType(Types.Type t) {
return new NullAccessor(t);
}

// Primitive typed fast-paths return boxed nulls; callers should check nullability separately.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems missing getDecimal() and others

@Override
public Boolean getBoolean(int rowId) { return null; }
@Override
public Integer getInt(int rowId) { return null; }
@Override
public Long getLong(int rowId) { return null; }
@Override
public Float getFloat(int rowId) { return null; }
@Override
public Double getDouble(int rowId) { return null; }
@Override
public byte[] getBinary(int rowId) { return null; }
@Override
public String getUTF8String(int rowId) { return null; }

@Override
public String toString() { return "NullAccessor(" + icebergType + ")"; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@
* with monotonically increasing values for the index parameter.
*/
public class NullabilityHolder {
/** Sentinel instance that reports every position as null. */
public static final NullabilityHolder ALL_NULLS = new NullabilityHolder() {
@Override public byte isNullAt(int index) { return (byte) 1; }
@Override public boolean hasNulls() { return true; }
@Override public int numNulls() { return Integer.MAX_VALUE; }
@Override public int size() { return Integer.MAX_VALUE; }
};

private final byte[] isNull;
private int numNulls;
private final byte[] nonNulls;
Expand All @@ -40,6 +48,13 @@ public NullabilityHolder(int size) {
Arrays.fill(nulls, (byte) 1);
}

private NullabilityHolder() {
// Private constructor for ALL_NULLS sentinel
this.isNull = null;
this.nonNulls = null;
this.nulls = null;
}

public int size() {
return isNull.length;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.arrow.vectorized;

import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;

public class TestNullVectorAccessor {

@Test
public void testNullVectorYieldsNullAccessor() {
Types.NestedField field =
Types.NestedField.optional(1, "gm_dp_identifier", Types.LongType.get());

ArrowVectorAccessor<?, String, ?, ?> acc =
GenericArrowVectorAccessorFactory.getPlainVectorAccessor(/*vector=*/null, field);

ColumnVector cv = new ColumnVector(field, acc, /*nullability*/ null);

// Should not throw and must report nulls
Assert.assertTrue(cv.isNullAt(0));
Assert.assertTrue(cv.isNullAt(42));

// Accessor should gracefully return nulls
Assert.assertNull(acc.getLong(0));
Assert.assertNull(acc.getUTF8String(0));
}
}
Loading