Skip to content

Commit c83c6b2

Browse files
authored
[avro] Fix Avro decoder bitmap corruption when nullable field decoding fails (#8213)
# Which issue does this PR close? - Closes #8212 # Rationale for this change In the original code, the bitmap was modified before decoding. Even if decoding fails, the null buffer was modified, leading to bitmap corruption, eventually causing flush to fail. # What changes are included in this PR? This PR fixes the bug where the bitmap was modified before decoding. If there is decoding failure, the bitmap should not be modified but the decode method should be exited gracefully without any side effect. # Are these changes tested? - Added a unit test # Are there any user-facing changes? No.
1 parent 32b385b commit c83c6b2

File tree

1 file changed

+39
-1
lines changed

1 file changed

+39
-1
lines changed

arrow-avro/src/reader/record.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,11 +484,13 @@ impl Decoder {
484484
Nullability::NullFirst => branch != 0,
485485
Nullability::NullSecond => branch == 0,
486486
};
487-
nb.append(is_not_null);
488487
if is_not_null {
488+
// It is mportant to decode before appending to null buffer in case of decode error
489489
encoding.decode(buf)?;
490+
nb.append(true);
490491
} else {
491492
encoding.append_null();
493+
nb.append(false);
492494
}
493495
}
494496
}
@@ -1433,4 +1435,40 @@ mod tests {
14331435
let array = decoder.flush(None).unwrap();
14341436
assert_eq!(array.len(), 0);
14351437
}
1438+
1439+
#[test]
1440+
fn test_nullable_decode_error_bitmap_corruption() {
1441+
// Nullable Int32 with ['T','null'] encoding (NullSecond)
1442+
let avro_type = AvroDataType::new(
1443+
Codec::Int32,
1444+
Default::default(),
1445+
Some(Nullability::NullSecond),
1446+
);
1447+
let mut decoder = Decoder::try_new(&avro_type).unwrap();
1448+
1449+
// Row 1: union branch 1 (null)
1450+
let mut row1 = Vec::new();
1451+
row1.extend_from_slice(&encode_avro_int(1));
1452+
1453+
// Row 2: union branch 0 (non-null) but missing the int payload -> decode error
1454+
let mut row2 = Vec::new();
1455+
row2.extend_from_slice(&encode_avro_int(0)); // branch = 0 => non-null
1456+
1457+
// Row 3: union branch 0 (non-null) with correct int payload -> should succeed
1458+
let mut row3 = Vec::new();
1459+
row3.extend_from_slice(&encode_avro_int(0)); // branch
1460+
row3.extend_from_slice(&encode_avro_int(42)); // actual value
1461+
1462+
decoder.decode(&mut AvroCursor::new(&row1)).unwrap();
1463+
assert!(decoder.decode(&mut AvroCursor::new(&row2)).is_err()); // decode error
1464+
decoder.decode(&mut AvroCursor::new(&row3)).unwrap();
1465+
1466+
let array = decoder.flush(None).unwrap();
1467+
1468+
// Should contain 2 elements: row1 (null) and row3 (42)
1469+
assert_eq!(array.len(), 2);
1470+
let int_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1471+
assert!(int_array.is_null(0)); // row1 is null
1472+
assert_eq!(int_array.value(1), 42); // row3 value is 42
1473+
}
14361474
}

0 commit comments

Comments
 (0)