From fca95e636d5798c808401a143207a1913c5fcc7f Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 11 Sep 2025 13:50:53 -0400
Subject: [PATCH 1/8] Update parquet-testing pin to `master`

---
 parquet-testing | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parquet-testing b/parquet-testing
index 5cbfc43d488c..a3d96a65e11e 160000
--- a/parquet-testing
+++ b/parquet-testing
@@ -1 +1 @@
-Subproject commit 5cbfc43d488c9c8404a1a7088cca400ae095b831
+Subproject commit a3d96a65e11e2bbca7d22a894e8313ede90a33a3

From a2ce848e660885585aa4b8f92eec119ffecdab22 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 11 Sep 2025 14:23:49 -0400
Subject: [PATCH 2/8] Update variant_integration tests to read and use the new
 shredding support

---
 parquet-variant/src/variant/metadata.rs |    8 +-
 parquet/Cargo.toml                      |    5 +
 parquet/tests/variant_integration.rs    | 1580 ++++++-----------------
 3 files changed, 421 insertions(+), 1172 deletions(-)

diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs
index 1c9da6bcc0cf..14d048631de7 100644
--- a/parquet-variant/src/variant/metadata.rs
+++ b/parquet-variant/src/variant/metadata.rs
@@ -130,6 +130,7 @@ impl VariantMetadataHeader {
 /// [Variant Spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#metadata-encoding
 #[derive(Debug, Clone, PartialEq)]
 pub struct VariantMetadata<'m> {
+    /// (Only) the bytes that make up this metadata instance.
     pub(crate) bytes: &'m [u8],
     header: VariantMetadataHeader,
     dictionary_size: u32,
@@ -299,7 +300,7 @@ impl<'m> VariantMetadata<'m> {
         self.header.version
     }
 
-    /// Gets an offset array entry by index.
+    /// Gets an offset into the dictionary entry by index.
     ///
     /// This offset is an index into the dictionary, at the boundary between string `i-1` and string
     /// `i`. See [`Self::get`] to retrieve a specific dictionary entry.
@@ -309,6 +310,11 @@ impl<'m> VariantMetadata<'m> {
         self.header.offset_size.unpack_u32(bytes, i)
     }
 
+    /// Returns the last byte offset of the metadata dictionary, which is also the total size of
+    pub fn size(&self) -> usize {
+        self.bytes.len()
+    }
+
     /// Attempts to retrieve a dictionary entry by index, failing if out of bounds or if the
     /// underlying bytes are [invalid].
     ///
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index a39275fb254e..5dbd4b5b39dd 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -171,6 +171,11 @@ name = "encryption"
 required-features = ["arrow"]
 path = "./tests/encryption/mod.rs"
 
+[[test]]
+name = "variant_integration"
+required-features = ["arrow", "variant_experimental", "serde"]
+path = "./tests/variant_integration.rs"
+
 [[bin]]
 name = "parquet-read"
 required-features = ["cli"]
diff --git a/parquet/tests/variant_integration.rs b/parquet/tests/variant_integration.rs
index e379b820f29f..fe863462a660 100644
--- a/parquet/tests/variant_integration.rs
+++ b/parquet/tests/variant_integration.rs
@@ -21,1233 +21,471 @@
 //! Variant values from .variant.bin files, reads Parquet files, converts StructArray
 //! to VariantArray, and verifies that extracted values match expected results.
 //!
-//! Based on the parquet-testing PR: https://github.com/apache/parquet-testing/pull/90/files
-//! Inspired by the arrow-go implementation: https://github.com/apache/arrow-go/pull/455/files
+//! Inspired by the arrow-go implementation: <https://github.com/apache/arrow-go/pull/455/files>
 
-// These tests require the arrow feature
-#![cfg(feature = "arrow")]
-
-use arrow_array::{Array, StructArray};
+use arrow::util::test_util::parquet_test_data;
+use arrow_array::{Array, ArrayRef};
+use arrow_cast::cast;
+use arrow_schema::{DataType, Fields};
 use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
-use std::{
-    env,
-    error::Error,
-    fs,
-    path::{Path, PathBuf},
-};
+use parquet_variant::{Variant, VariantMetadata};
+use parquet_variant_compute::VariantArray;
+use serde::Deserialize;
+use std::path::Path;
+use std::sync::{Arc, LazyLock};
+use std::{fs, path::PathBuf};
+
+type Result<T> = std::result::Result<T, String>;
+
+/// Creates a test function for a given case number
+///
+/// Note the index is zero-based, while the case number is one-based
+macro_rules! variant_test_case {
+    ($case_num:literal) => {
+        paste::paste! {
+            #[test]
+            fn [<test_variant_integration_case_ $case_num>]() {
+                all_cases()[$case_num - 1].run()
+            }
+        }
+    };
+
+    // Generates an error test case, where the expected result is an error message
+    ($case_num:literal, $expected_error:literal) => {
+        paste::paste! {
+            #[test]
+            #[should_panic(expected = $expected_error)]
+            fn [<test_variant_integration_case_ $case_num>]() {
+                all_cases()[$case_num - 1].run()
+            }
+        }
+    };
+}
 
-/// Test case definition structure matching the format from cases.json
-#[derive(Debug, Clone)]
+// Generate test functions for each case
+// Note that case 3 is empty in cases.json, so we skip it here
+// Only cases 40, 42, 87, 127 and 128 are expected to fail, the rest should pass
+
+variant_test_case!(1, "Unsupported typed_value type: List(");
+variant_test_case!(2, "Unsupported typed_value type: List(");
+// case 3 is empty in cases.json 🤷
+// ```json
+// {
+//   "case_number" : 3
+// },
+// ```
+variant_test_case!(3, "parquet_file must be set");
+variant_test_case!(4, "Unsupported typed_value type: Boolean");
+variant_test_case!(5, "Unsupported typed_value type: Boolean");
+variant_test_case!(6);
+variant_test_case!(7);
+variant_test_case!(8);
+variant_test_case!(9);
+variant_test_case!(10);
+variant_test_case!(11);
+variant_test_case!(12);
+variant_test_case!(13);
+variant_test_case!(14);
+variant_test_case!(15);
+variant_test_case!(16);
+variant_test_case!(17);
+variant_test_case!(18, "Unsupported typed_value type: Date32");
+variant_test_case!(19, "Unsupported typed_value type: Date32");
+variant_test_case!(
+    20,
+    "Unsupported typed_value type: Timestamp(Microsecond, Some(\"UTC\"))"
+);
+variant_test_case!(
+    21,
+    "Unsupported typed_value type: Timestamp(Microsecond, Some(\"UTC\"))"
+);
+variant_test_case!(
+    22,
+    "Unsupported typed_value type: Timestamp(Microsecond, None)"
+);
+variant_test_case!(
+    23,
+    "Unsupported typed_value type: Timestamp(Microsecond, None)"
+);
+variant_test_case!(24, "Unsupported typed_value type: Decimal128(9, 4)");
+variant_test_case!(25, "Unsupported typed_value type: Decimal128(9, 4)");
+variant_test_case!(26, "Unsupported typed_value type: Decimal128(18, 9)");
+variant_test_case!(27, "Unsupported typed_value type: Decimal128(18, 9)");
+variant_test_case!(28, "Unsupported typed_value type: Decimal128(38, 9)");
+variant_test_case!(29, "Unsupported typed_value type: Decimal128(38, 9)");
+variant_test_case!(30, "Unsupported typed_value type: BinaryView");
+variant_test_case!(31, "Unsupported typed_value type: Utf8");
+variant_test_case!(32, "Unsupported typed_value type: Time64(Microsecond)");
+variant_test_case!(
+    33,
+    "Unsupported typed_value type: Timestamp(Nanosecond, Some(\"UTC\"))"
+);
+variant_test_case!(
+    34,
+    "Unsupported typed_value type: Timestamp(Nanosecond, Some(\"UTC\"))"
+);
+variant_test_case!(
+    35,
+    "Unsupported typed_value type: Timestamp(Nanosecond, None)"
+);
+variant_test_case!(
+    36,
+    "Unsupported typed_value type: Timestamp(Nanosecond, None)"
+);
+variant_test_case!(37, "Unsupported typed_value type: FixedSizeBinary(16)");
+variant_test_case!(38, "Unsupported typed_value type: Struct(");
+variant_test_case!(39);
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(40, "Unsupported typed_value type: List(");
+variant_test_case!(41, "Unsupported typed_value type: List(Field");
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(
+    42,
+    "Expected an error 'Invalid variant, conflicting value and typed_value`, but got no error"
+);
+variant_test_case!(43, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(44, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(45, "Unsupported typed_value type: List(Field");
+variant_test_case!(46, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(47);
+variant_test_case!(48);
+variant_test_case!(49);
+variant_test_case!(50);
+variant_test_case!(51);
+variant_test_case!(52);
+variant_test_case!(53);
+variant_test_case!(54);
+variant_test_case!(55);
+variant_test_case!(56);
+variant_test_case!(57);
+variant_test_case!(58);
+variant_test_case!(59);
+variant_test_case!(60);
+variant_test_case!(61);
+variant_test_case!(62);
+variant_test_case!(63);
+variant_test_case!(64);
+variant_test_case!(65);
+variant_test_case!(66);
+variant_test_case!(67);
+variant_test_case!(68);
+variant_test_case!(69);
+variant_test_case!(70);
+variant_test_case!(71);
+variant_test_case!(72);
+variant_test_case!(73);
+variant_test_case!(74);
+variant_test_case!(75);
+variant_test_case!(76);
+variant_test_case!(77);
+variant_test_case!(78);
+variant_test_case!(79);
+variant_test_case!(80);
+variant_test_case!(81);
+variant_test_case!(82);
+variant_test_case!(83, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(84, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(85, "Unsupported typed_value type: List(Field");
+variant_test_case!(86, "Unsupported typed_value type: List(Field");
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(87, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(88, "Unsupported typed_value type: List(Field");
+variant_test_case!(89);
+variant_test_case!(90);
+variant_test_case!(91);
+variant_test_case!(92);
+variant_test_case!(93);
+variant_test_case!(94);
+variant_test_case!(95);
+variant_test_case!(96);
+variant_test_case!(97);
+variant_test_case!(98);
+variant_test_case!(99);
+variant_test_case!(100);
+variant_test_case!(101);
+variant_test_case!(102);
+variant_test_case!(103);
+variant_test_case!(104);
+variant_test_case!(105);
+variant_test_case!(106);
+variant_test_case!(107);
+variant_test_case!(108);
+variant_test_case!(109);
+variant_test_case!(110);
+variant_test_case!(111);
+variant_test_case!(112);
+variant_test_case!(113);
+variant_test_case!(114);
+variant_test_case!(115);
+variant_test_case!(116);
+variant_test_case!(117);
+variant_test_case!(118);
+variant_test_case!(119);
+variant_test_case!(120);
+variant_test_case!(121);
+variant_test_case!(122);
+variant_test_case!(123);
+variant_test_case!(124);
+variant_test_case!(125, "Unsupported typed_value type: Struct");
+variant_test_case!(126, "Unsupported typed_value type: List(");
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(
+    127,
+    "Invalid variant data: InvalidArgumentError(\"Received empty bytes\")"
+);
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(128, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(129, "Invalid variant data: InvalidArgumentError(");
+variant_test_case!(130, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(131);
+variant_test_case!(132, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(133, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(134, "Unsupported typed_value type: Struct([Field");
+variant_test_case!(135);
+variant_test_case!(136, "Unsupported typed_value type: List(Field ");
+variant_test_case!(137, "Invalid variant data: InvalidArgumentError(");
+variant_test_case!(138, "Unsupported typed_value type: Struct([Field");
+
+/// Test case definition structure matching the format from
+/// `parquet-testing/parquet_shredded/cases.json`
+///
+/// See [README] for details.
+///
+/// [README]: https://github.com/apache/parquet-testing/blob/master/shredded_variant/README.md
+///
+/// Example JSON
+/// ```json
+/// {
+///   "case_number" : 5,
+///   "test" : "testShreddedVariantPrimitives",
+///   "parquet_file" : "case-005.parquet",
+///   "variant_file" : "case-005_row-0.variant.bin",
+///   "variant" : "Variant(metadata=VariantMetadata(dict={}), value=Variant(type=BOOLEAN_FALSE, value=false))"
+/// },
+/// ```
+#[allow(dead_code)] // some fields are not used except when printing the struct
+#[derive(Debug, Clone, Deserialize)]
 struct VariantTestCase {
-    /// Case number (e.g., 1, 2, 4, etc. - note: case 3 is missing)
+    /// Case number (e.g., 1, 2, 4, etc. - note: case 3 is missing any data)
     pub case_number: u32,
     /// Test method name (e.g., "testSimpleArray")
     pub test: Option<String>,
     /// Name of the parquet file (e.g., "case-001.parquet")
-    pub parquet_file: String,
+    pub parquet_file: Option<String>,
+
     /// Expected variant binary file (e.g., "case-001_row-0.variant.bin") - None for error cases
     pub variant_file: Option<String>,
+    /// Multiple expected variant binary files, for multi row inputs. If there
+    /// is no variant, there is no file
+    pub variant_files: Option<Vec<Option<String>>>,
     /// Expected error message for negative test cases
+    ///
+    /// (this is the message from the cases.json file, which is from the Iceberg
+    /// implementation, so it is not guaranteed to match the actual Rust error message)
     pub error_message: Option<String>,
     /// Description of the variant value (for debugging)
     pub variant_description: Option<String>,
-    /// Whether this test is currently expected to pass
-    pub enabled: bool,
-    /// Test category for grouping and analysis
-    pub test_category: TestCategory,
-}
-
-/// Categories of variant tests for organized validation
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-enum TestCategory {
-    /// Basic primitive type tests
-    Primitives,
-    /// Array-related tests (simple, nested, with errors)
-    Arrays,
-    /// Object-related tests (shredded, partial, with errors)
-    Objects,
-    /// Tests expecting specific error conditions
-    ErrorHandling,
-    /// Schema validation and unshredded variants
-    SchemaValidation,
-    /// Mixed and complex scenarios
-    Complex,
-}
-
-/// Comprehensive test harness for Parquet Variant integration
-struct VariantIntegrationHarness {
-    /// Directory containing shredded_variant test data
-    test_data_dir: PathBuf,
-    /// Parsed test cases from cases.json
-    test_cases: Vec<VariantTestCase>,
 }
 
-impl VariantIntegrationHarness {
-    /// Create a new integration test harness
-    fn new() -> Result<Self, Box<dyn Error>> {
-        let test_data_dir = find_shredded_variant_test_data()?;
-        let test_cases = load_test_cases(&test_data_dir)?;
-
-        println!(
-            "Loaded {} test cases from {}",
-            test_cases.len(),
-            test_data_dir.display()
-        );
-
-        Ok(Self {
-            test_data_dir,
-            test_cases,
-        })
-    }
-
-    /// Run all integration tests
-    fn run_all_tests(&self) -> Result<(), Box<dyn Error>> {
-        println!("Running Parquet Variant Integration Tests");
-        println!("==========================================");
+/// Run a single test case
+impl VariantTestCase {
+    /// Run a test case. Panics on unexpected error
+    fn run(&self) {
+        println!("{self:#?}");
 
-        let mut passed = 0;
-        let mut failed = 0;
-        let mut ignored = 0;
+        let variant_data = self.load_variants();
+        let variant_array = self.load_parquet();
 
-        for test_case in &self.test_cases {
-            if !test_case.enabled {
-                println!(
-                    "IGNORED: case-{:03} - {}",
-                    test_case.case_number,
-                    test_case.test.as_deref().unwrap_or("unknown test")
-                );
-                ignored += 1;
-                continue;
-            }
-
-            match self.run_single_test(test_case) {
-                Ok(()) => {
-                    println!(
-                        "PASSED: case-{:03} - {}",
-                        test_case.case_number,
-                        test_case.test.as_deref().unwrap_or("unknown test")
-                    );
-                    passed += 1;
-                }
-                Err(e) => {
-                    println!(
-                        "FAILED: case-{:03} - {} - Error: {}",
-                        test_case.case_number,
-                        test_case.test.as_deref().unwrap_or("unknown test"),
-                        e
-                    );
-                    failed += 1;
-                }
-            }
-        }
-
-        println!("\nTest Results:");
-        println!("  Passed: {}", passed);
-        println!("  Failed: {}", failed);
-        println!("  Ignored: {}", ignored);
-        println!("  Total: {}", passed + failed + ignored);
-
-        if failed > 0 {
-            Err(format!("{} tests failed", failed).into())
-        } else {
-            Ok(())
-        }
-    }
-
-    /// Run a single test case
-    fn run_single_test(&self, test_case: &VariantTestCase) -> Result<(), Box<dyn Error>> {
-        match &test_case.test_category {
-            TestCategory::ErrorHandling => {
-                // For error cases, we expect the parsing/validation to fail
-                self.run_error_test(test_case)
-            }
-            _ => {
-                // For normal cases, run standard validation
-                self.run_success_test(test_case)
+        // if this is an error case, the expected error message should be set
+        if let Some(expected_error) = &self.error_message {
+            // just accessing the variant_array should trigger the error
+            for i in 0..variant_array.len() {
+                let _ = variant_array.value(i);
             }
+            panic!("Expected an error '{expected_error}`, but got no error");
         }
-    }
-
-    /// Run a test case that should succeed
-    fn run_success_test(&self, test_case: &VariantTestCase) -> Result<(), Box<dyn Error>> {
-        // Step 1: Load expected Variant data from .variant.bin file (if present)
-        let expected_variant_data = if let Some(variant_file) = &test_case.variant_file {
-            Some(self.load_expected_variant_data_by_file(variant_file)?)
-        } else {
-            None
-        };
 
-        // Step 2: Read Parquet file and extract StructArray
-        let struct_arrays = self.read_parquet_file(test_case)?;
-
-        // Step 3: For now, just verify the structure and basic validation
-        // TODO: Convert StructArray to VariantArray using cast_to_variant (requires variant crates)
-        // TODO: Extract values using both VariantArray::value() and variant_get kernel
-        // TODO: Compare extracted values with expected values
-
-        self.verify_variant_structure(&struct_arrays)?;
-
-        println!(
-            "  {} validation passed for case-{:03}",
-            match test_case.test_category {
-                TestCategory::Primitives => "Primitive type",
-                TestCategory::Arrays => "Array structure",
-                TestCategory::Objects => "Object structure",
-                TestCategory::SchemaValidation => "Schema",
-                TestCategory::Complex => "Complex structure",
-                _ => "Basic structure",
-            },
-            test_case.case_number
+        assert_eq!(
+            variant_array.len(),
+            variant_data.len(),
+            "Number of variants in parquet file does not match expected number"
         );
-
-        if let Some(data) = expected_variant_data {
-            println!("    Expected variant data: {} bytes", data.len());
-        }
-        println!(
-            "    Found {} StructArray(s) with variant structure",
-            struct_arrays.len()
-        );
-
-        Ok(())
-    }
-
-    /// Run a test case that should produce an error
-    fn run_error_test(&self, test_case: &VariantTestCase) -> Result<(), Box<dyn Error>> {
-        println!("  Testing error case for case-{:03}", test_case.case_number);
-
-        // Try to read the parquet file - this might fail as expected
-        match self.read_parquet_file(test_case) {
-            Ok(struct_arrays) => {
-                // If file reading succeeds, the error should come during variant processing
-                println!(
-                    "    Parquet file read successfully, expecting error during variant processing"
+        for (i, expected) in variant_data.iter().enumerate() {
+            if variant_array.is_null(i) {
+                assert!(
+                    expected.is_none(),
+                    "Expected null variant at index {i}, but got {:?}",
+                    variant_array.value(i)
                 );
-                println!("    Found {} StructArray(s)", struct_arrays.len());
-
-                // TODO: When variant processing is implemented, capture and validate the error
-                if let Some(expected_error) = &test_case.error_message {
-                    println!("    Expected error: {}", expected_error);
-                }
-            }
-            Err(e) => {
-                // File reading failed - check if this matches expected error
-                println!("    Parquet file reading failed: {}", e);
-                if let Some(expected_error) = &test_case.error_message {
-                    println!("    Expected error: {}", expected_error);
-                    // TODO: Match actual error against expected error pattern
-                }
+                continue;
             }
-        }
+            let actual = variant_array.value(i);
+            let expected = variant_data[i]
+                .as_ref()
+                .expect("Expected non-null variant data");
 
-        Ok(())
-    }
+            let expected = expected.as_variant();
 
-    /// Load expected Variant binary data from .variant.bin file
-    #[allow(dead_code)]
-    fn load_expected_variant_data(
-        &self,
-        test_case: &VariantTestCase,
-    ) -> Result<Vec<u8>, Box<dyn Error>> {
-        if let Some(variant_file) = &test_case.variant_file {
-            self.load_expected_variant_data_by_file(variant_file)
-        } else {
-            Err("No variant file specified for this test case".into())
+            // compare the variants (is this the right way to compare?)
+            assert_eq!(actual, expected, "Variant data mismatch at index {}\n\nactual\n{actual:#?}\n\nexpected\n{expected:#?}", i);
         }
     }
 
-    /// Load expected Variant binary data by file name
-    fn load_expected_variant_data_by_file(
-        &self,
-        variant_file: &str,
-    ) -> Result<Vec<u8>, Box<dyn Error>> {
-        let variant_path = self.test_data_dir.join(variant_file);
-
-        if !variant_path.exists() {
-            return Err(format!("Variant file not found: {}", variant_path.display()).into());
-        }
-
-        let data = fs::read(&variant_path)?;
-        Ok(data)
+    /// Parses the expected variant files, returning a vector of `ExpectedVariant` or None
+    /// if the corresponding entry in `variant_files` is null
+    fn load_variants(&self) -> Vec<Option<ExpectedVariant>> {
+        let variant_files: Box<dyn Iterator<Item = Option<&String>>> =
+            match (&self.variant_files, &self.variant_file) {
+                (Some(files), None) => Box::new(files.iter().map(|f| f.as_ref())),
+                (None, Some(file)) => Box::new(std::iter::once(Some(file))),
+                // error cases may not have any variant files
+                _ => Box::new(std::iter::empty()),
+            };
+
+        // load each file
+        variant_files
+            .map(|f| {
+                let v = ExpectedVariant::try_load(&TEST_CASE_DIR.join(f?))
+                    .expect("Failed to load expected variant");
+                Some(v)
+            })
+            .collect()
     }
 
-    /// Read Parquet file and extract StructArray columns
-    fn read_parquet_file(
-        &self,
-        test_case: &VariantTestCase,
-    ) -> Result<Vec<StructArray>, Box<dyn Error>> {
-        let parquet_path = self.test_data_dir.join(&test_case.parquet_file);
-
-        if !parquet_path.exists() {
-            return Err(format!("Parquet file not found: {}", parquet_path.display()).into());
+    /// Load the parquet file, extract the Variant column, and return as a VariantArray
+    fn load_parquet(&self) -> VariantArray {
+        let parquet_file = self
+            .parquet_file
+            .as_ref()
+            .expect("parquet_file must be set");
+        let path = TEST_CASE_DIR.join(parquet_file);
+        let file = fs::File::open(&path)
+            .unwrap_or_else(|e| panic!("cannot open parquet file {path:?}: {e}"));
+
+        let reader = ParquetRecordBatchReaderBuilder::try_new(file)
+            .and_then(|b| b.build())
+            .unwrap_or_else(|e| panic!("Error reading parquet reader for {path:?}: {e}"));
+
+        let mut batches: Vec<_> = reader
+            .collect::<std::result::Result<_, _>>()
+            .unwrap_or_else(|e| panic!("Error reading parquet batches for {path:?}: {e}"));
+
+        if batches.is_empty() {
+            panic!("No parquet batches were found in file {path:?}");
         }
-
-        let file = fs::File::open(&parquet_path)?;
-        let builder = ParquetRecordBatchReaderBuilder::try_new(file)?;
-        let reader = builder.build()?;
-
-        let mut struct_arrays = Vec::new();
-
-        for batch_result in reader {
-            let batch = batch_result?;
-
-            // Look for StructArray columns that could contain Variant data
-            for column in batch.columns() {
-                if let Some(struct_array) = column.as_any().downcast_ref::<StructArray>() {
-                    // Check if this StructArray has the expected Variant structure
-                    if self.is_variant_struct_array(struct_array)? {
-                        struct_arrays.push(struct_array.clone());
-                    }
-                }
-            }
-        }
-
-        if struct_arrays.is_empty() {
-            return Err("No valid Variant StructArray columns found in Parquet file".into());
-        }
-
-        Ok(struct_arrays)
-    }
-
-    /// Check if a StructArray has the expected Variant structure (metadata, value fields)
-    fn is_variant_struct_array(&self, struct_array: &StructArray) -> Result<bool, Box<dyn Error>> {
-        let column_names = struct_array.column_names();
-        let field_names: Vec<&str> = column_names.to_vec();
-
-        // Check for required Variant fields
-        let has_metadata = field_names.contains(&"metadata");
-        let has_value = field_names.contains(&"value");
-
-        Ok(has_metadata && has_value)
-    }
-
-    /// Verify that StructArrays have the expected Variant structure
-    fn verify_variant_structure(
-        &self,
-        struct_arrays: &[StructArray],
-    ) -> Result<(), Box<dyn Error>> {
-        for (i, struct_array) in struct_arrays.iter().enumerate() {
-            if !self.is_variant_struct_array(struct_array)? {
-                return Err(
-                    format!("StructArray {} does not have expected Variant structure", i).into(),
-                );
-            }
-
-            println!(
-                "    StructArray {} has {} rows and valid Variant structure",
-                i,
-                struct_array.len()
+        if batches.len() > 1 {
+            panic!(
+                "Multiple parquet batches were found in file {path:?}, only single batch supported"
             );
         }
-
-        Ok(())
-    }
-}
-
-/// Find the shredded_variant test data directory
-fn find_shredded_variant_test_data() -> Result<PathBuf, Box<dyn Error>> {
-    // Try environment variable first
-    if let Ok(dir) = env::var("PARQUET_TEST_DATA") {
-        let shredded_variant_dir = PathBuf::from(dir).join("shredded_variant");
-        if shredded_variant_dir.is_dir() {
-            return Ok(shredded_variant_dir);
-        }
-    }
-
-    // Try relative paths from CARGO_MANIFEST_DIR
-    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string());
-    let candidates = vec![
-        PathBuf::from(&manifest_dir).join("../parquet-testing/shredded_variant"),
-        PathBuf::from(&manifest_dir).join("parquet-testing/shredded_variant"),
-        PathBuf::from("parquet-testing/shredded_variant"),
-    ];
-
-    for candidate in candidates {
-        if candidate.is_dir() {
-            return Ok(candidate);
-        }
-    }
-
-    Err("Could not find shredded_variant test data directory. Ensure parquet-testing submodule is initialized with PR #90 data.".into())
-}
-
-/// Load test cases from cases.json
-fn load_test_cases(test_data_dir: &Path) -> Result<Vec<VariantTestCase>, Box<dyn Error>> {
-    let cases_file = test_data_dir.join("cases.json");
-
-    if !cases_file.exists() {
-        return Err(format!("cases.json not found at {}", cases_file.display()).into());
-    }
-
-    let content = fs::read_to_string(&cases_file)?;
-
-    // Parse JSON manually since serde is not available as a dependency
-    parse_cases_json(&content)
-}
-
-/// Parse cases.json manually without serde
-fn parse_cases_json(content: &str) -> Result<Vec<VariantTestCase>, Box<dyn Error>> {
-    let mut test_cases = Vec::new();
-
-    // Simple JSON parsing for the specific format we expect
-    // Format: [{"case_number": 1, "test": "...", "parquet_file": "...", "variant_file": "...", "variant": "..."}, ...]
-
-    let lines: Vec<&str> = content.lines().collect();
-    let mut current_case: Option<VariantTestCase> = None;
-
-    for line in lines {
-        let trimmed = line.trim();
-
-        if trimmed.contains("\"case_number\"") {
-            // Extract case number
-            if let Some(colon_pos) = trimmed.find(':') {
-                let number_part = &trimmed[colon_pos + 1..];
-                if let Some(comma_pos) = number_part.find(',') {
-                    let number_str = number_part[..comma_pos].trim();
-                    if let Ok(case_number) = number_str.parse::<u32>() {
-                        current_case = Some(VariantTestCase {
-                            case_number,
-                            test: None,
-                            parquet_file: String::new(),
-                            variant_file: None,
-                            error_message: None,
-                            variant_description: None,
-                            enabled: false, // Start disabled, enable progressively
-                            test_category: TestCategory::Primitives, // Default, will be updated
-                        });
-                    }
-                }
-            }
-        } else if trimmed.contains("\"test\"") && current_case.is_some() {
-            // Extract test name
-            if let Some(case) = current_case.as_mut() {
-                if let Some(start) = trimmed.find("\"test\"") {
-                    let after_test = &trimmed[start + 6..];
-                    if let Some(colon_pos) = after_test.find(':') {
-                        let value_part = &after_test[colon_pos + 1..].trim();
-                        if let Some(start_quote) = value_part.find('"') {
-                            let after_quote = &value_part[start_quote + 1..];
-                            if let Some(end_quote) = after_quote.find('"') {
-                                case.test = Some(after_quote[..end_quote].to_string());
-                            }
-                        }
-                    }
-                }
-            }
-        } else if trimmed.contains("\"parquet_file\"") && current_case.is_some() {
-            // Extract parquet file name
-            if let Some(case) = current_case.as_mut() {
-                if let Some(start_quote) = trimmed.rfind('"') {
-                    let before_quote = &trimmed[..start_quote];
-                    if let Some(second_quote) = before_quote.rfind('"') {
-                        case.parquet_file = before_quote[second_quote + 1..].to_string();
-                    }
-                }
-            }
-        } else if trimmed.contains("\"variant_file\"") && current_case.is_some() {
-            // Extract variant file name
-            if let Some(case) = current_case.as_mut() {
-                if let Some(start_quote) = trimmed.rfind('"') {
-                    let before_quote = &trimmed[..start_quote];
-                    if let Some(second_quote) = before_quote.rfind('"') {
-                        case.variant_file = Some(before_quote[second_quote + 1..].to_string());
-                    }
-                }
-            }
-        } else if trimmed.contains("\"error_message\"") && current_case.is_some() {
-            // Extract error message for negative test cases
-            if let Some(case) = current_case.as_mut() {
-                if let Some(start_quote) = trimmed.rfind('"') {
-                    let before_quote = &trimmed[..start_quote];
-                    if let Some(second_quote) = before_quote.rfind('"') {
-                        case.error_message = Some(before_quote[second_quote + 1..].to_string());
-                        case.test_category = TestCategory::ErrorHandling;
-                    }
-                }
-            }
-        } else if trimmed.contains("\"variant\"") && current_case.is_some() {
-            // Extract variant description
-            if let Some(case) = current_case.as_mut() {
-                if let Some(start_quote) = trimmed.rfind('"') {
-                    let before_quote = &trimmed[..start_quote];
-                    if let Some(second_quote) = before_quote.rfind('"') {
-                        case.variant_description =
-                            Some(before_quote[second_quote + 1..].to_string());
-                    }
-                }
-            }
-        } else if trimmed == "}, {" || trimmed == "}" {
-            // End of current case
-            if let Some(mut case) = current_case.take() {
-                if !case.parquet_file.is_empty()
-                    && (case.variant_file.is_some() || case.error_message.is_some())
-                {
-                    // Categorize the test based on its name if not already categorized
-                    if case.test_category == TestCategory::Primitives
-                        && case.error_message.is_none()
-                    {
-                        case.test_category = categorize_test(&case.test);
-                    }
-                    test_cases.push(case);
-                }
-            }
-        }
-    }
-
-    // Handle the last case if the JSON doesn't end with }, {
-    if let Some(mut case) = current_case {
-        if !case.parquet_file.is_empty()
-            && (case.variant_file.is_some() || case.error_message.is_some())
-        {
-            // Categorize the test based on its name if not already categorized
-            if case.test_category == TestCategory::Primitives && case.error_message.is_none() {
-                case.test_category = categorize_test(&case.test);
-            }
-            test_cases.push(case);
-        }
-    }
-
-    Ok(test_cases)
-}
-
-/// Categorize a test based on its test method name
-fn categorize_test(test_name: &Option<String>) -> TestCategory {
-    match test_name.as_ref().map(|s| s.as_str()) {
-        Some(name) if name.contains("Array") => TestCategory::Arrays,
-        Some(name) if name.contains("Object") => TestCategory::Objects,
-        Some(name) if name.contains("Unshredded") => TestCategory::SchemaValidation,
-        Some(name) if name.contains("Mixed") || name.contains("Nested") => TestCategory::Complex,
-        Some(name) if name.contains("Primitives") => TestCategory::Primitives,
-        _ => TestCategory::Primitives, // Default fallback
-    }
-}
-
-// Individual test functions with #[ignore] for progressive enablement
-// Following the exact pattern from the PR description
-
-#[test]
-#[ignore] // Enable once parquet-variant dependencies are added
-fn test_variant_integration_case_001() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 1)
-        .expect("case-001 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("case-001 should pass");
-}
-
-#[test]
-#[ignore] // Enable once parquet-variant dependencies are added
-fn test_variant_integration_case_002() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 2)
-        .expect("case-002 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("case-002 should pass");
-}
-
-#[test]
-#[ignore] // Enable once parquet-variant dependencies are added
-fn test_variant_integration_case_004() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 4)
-        .expect("case-004 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("case-004 should pass");
-}
-
-#[test]
-#[ignore] // Enable once parquet-variant dependencies are added
-fn test_variant_integration_case_005() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 5)
-        .expect("case-005 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("case-005 should pass");
-}
-
-#[test]
-#[ignore] // Enable once parquet-variant dependencies are added
-fn test_variant_integration_case_006() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 6)
-        .expect("case-006 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("case-006 should pass");
-}
-
-// Add more individual test cases for key scenarios
-#[test]
-#[ignore] // Enable once parquet-variant dependencies are added
-fn test_variant_integration_case_007() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 7)
-        .expect("case-007 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("case-007 should pass");
-}
-
-#[test]
-#[ignore] // Enable once parquet-variant dependencies are added
-fn test_variant_integration_case_008() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 8)
-        .expect("case-008 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("case-008 should pass");
-}
-
-#[test]
-#[ignore] // Enable once parquet-variant dependencies are added
-fn test_variant_integration_case_009() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 9)
-        .expect("case-009 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("case-009 should pass");
-}
-
-#[test]
-#[ignore] // Enable once parquet-variant dependencies are added
-fn test_variant_integration_case_010() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 10)
-        .expect("case-010 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("case-010 should pass");
-}
-
-// Specific tests for error cases that should be enabled to test error handling
-#[test]
-#[ignore] // Enable to test error handling - case with conflicting value and typed_value
-fn test_variant_integration_error_case_040() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 40)
-        .expect("case-040 not found");
-
-    // This should handle the error gracefully
-    harness
-        .run_single_test(test_case)
-        .expect("Error case should be handled gracefully");
-}
-
-#[test]
-#[ignore] // Enable to test error handling - case with value and typed_value conflict
-fn test_variant_integration_error_case_042() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let test_case = harness
-        .test_cases
-        .iter()
-        .find(|case| case.case_number == 42)
-        .expect("case-042 not found");
-
-    harness
-        .run_single_test(test_case)
-        .expect("Error case should be handled gracefully");
-}
-
-// Test that runs all cases by category
-#[test]
-#[ignore] // Enable when ready to run all tests
-fn test_variant_integration_all_cases() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-    harness
-        .run_all_tests()
-        .expect("Integration tests should pass");
-}
-
-#[test]
-#[ignore] // Enable to test primitive type cases
-fn test_variant_integration_primitives_only() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let primitive_cases: Vec<_> = harness
-        .test_cases
-        .iter()
-        .filter(|case| case.test_category == TestCategory::Primitives)
-        .collect();
-
-    println!("Testing {} primitive cases", primitive_cases.len());
-
-    let mut passed = 0;
-    let mut failed = 0;
-
-    for test_case in primitive_cases {
-        match harness.run_single_test(test_case) {
-            Ok(()) => {
-                println!("PASSED: case-{:03}", test_case.case_number);
-                passed += 1;
-            }
-            Err(e) => {
-                println!("FAILED: case-{:03} - {}", test_case.case_number, e);
-                failed += 1;
-            }
-        }
-    }
-
-    println!("Primitive tests: {} passed, {} failed", passed, failed);
-    assert!(failed == 0, "All primitive tests should pass");
-}
-
-#[test]
-#[ignore] // Enable to test array cases
-fn test_variant_integration_arrays_only() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let array_cases: Vec<_> = harness
-        .test_cases
-        .iter()
-        .filter(|case| case.test_category == TestCategory::Arrays)
-        .collect();
-
-    println!("Testing {} array cases", array_cases.len());
-
-    for test_case in array_cases {
-        println!(
-            "Testing case-{:03}: {}",
-            test_case.case_number,
-            test_case.test.as_deref().unwrap_or("unknown")
-        );
-        match harness.run_single_test(test_case) {
-            Ok(()) => println!("  PASSED"),
-            Err(e) => println!("  FAILED: {}", e),
-        }
-    }
-}
-
-#[test]
-#[ignore] // Enable to test object cases
-fn test_variant_integration_objects_only() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let object_cases: Vec<_> = harness
-        .test_cases
-        .iter()
-        .filter(|case| case.test_category == TestCategory::Objects)
-        .collect();
-
-    println!("Testing {} object cases", object_cases.len());
-
-    for test_case in object_cases {
-        println!(
-            "Testing case-{:03}: {}",
-            test_case.case_number,
-            test_case.test.as_deref().unwrap_or("unknown")
-        );
-        match harness.run_single_test(test_case) {
-            Ok(()) => println!("  PASSED"),
-            Err(e) => println!("  FAILED: {}", e),
-        }
+        let batch = batches.swap_remove(0);
+
+        // The schema is "id", "var" for the id and variant columns
+        // TODO: support the actual parquet logical type annotation somehow
+        let var = batch
+            .column_by_name("var")
+            .unwrap_or_else(|| panic!("No 'var' column found in parquet file {path:?}"));
+
+        // the values are read as
+        // * StructArray<metadata: Binary, value: Binary>
+        // but VariantArray needs them as
+        // * StructArray<metadata: BinaryView, value: BinaryView>
+        //
+        // So cast them to get the right type. Hack Alert: the parquet reader
+        // should read them directly as BinaryView
+        let var = cast_to_binary_view_arrays(var);
+
+        VariantArray::try_new(var).unwrap_or_else(|e| {
+            panic!("Error converting StructArray to VariantArray for {path:?}: {e}")
+        })
     }
 }
 
-#[test]
-#[ignore] // Enable to test error handling cases
-fn test_variant_integration_error_cases_only() {
-    let harness = VariantIntegrationHarness::new().expect("Failed to create test harness");
-
-    let error_cases: Vec<_> = harness
-        .test_cases
-        .iter()
-        .filter(|case| case.test_category == TestCategory::ErrorHandling)
-        .collect();
-
-    println!("Testing {} error cases", error_cases.len());
-
-    for test_case in error_cases {
-        println!(
-            "Testing error case-{:03}: {}",
-            test_case.case_number,
-            test_case.test.as_deref().unwrap_or("unknown")
-        );
-        println!(
-            "  Expected error: {}",
-            test_case.error_message.as_deref().unwrap_or("none")
-        );
-        match harness.run_single_test(test_case) {
-            Ok(()) => println!("  Error case handled gracefully"),
-            Err(e) => println!("  Error case processing failed: {}", e),
-        }
-    }
+fn cast_to_binary_view_arrays(array: &ArrayRef) -> ArrayRef {
+    let new_type = map_type(array.data_type());
+    cast(array, &new_type).unwrap_or_else(|e| {
+        panic!(
+            "Error casting array from {:?} to {:?}: {e}",
+            array.data_type(),
+            new_type
+        )
+    })
 }
 
-// Test that actually reads and validates parquet file structure
-#[test]
-fn test_variant_structure_validation() {
-    // This test attempts to read actual parquet files and validate their structure
-    println!("Testing parquet file structure validation");
-
-    match VariantIntegrationHarness::new() {
-        Ok(harness) => {
-            println!(
-                "Successfully loaded test harness with {} test cases",
-                harness.test_cases.len()
-            );
-
-            // Test structural validation on a few test cases
-            let test_cases_to_validate = [1, 2, 4, 5];
-            let mut validated_cases = 0;
-
-            for case_number in test_cases_to_validate {
-                if let Some(test_case) = harness
-                    .test_cases
-                    .iter()
-                    .find(|c| c.case_number == case_number)
-                {
-                    println!(
-                        "\nValidating case-{:03}: {}",
-                        case_number, test_case.parquet_file
-                    );
-
-                    match harness.run_single_test(test_case) {
-                        Ok(()) => {
-                            println!("  Structure validation PASSED for case-{:03}", case_number);
-                            validated_cases += 1;
-                        }
-                        Err(e) => {
-                            println!(
-                                "  Structure validation FAILED for case-{:03}: {}",
-                                case_number, e
-                            );
-                            // Don't fail the test for structural issues during development
-                        }
-                    }
-                }
-            }
-
-            println!(
-                "\nValidated {} test case structures successfully",
-                validated_cases
-            );
-        }
-        Err(e) => {
-            println!("Could not find shredded_variant test data: {}", e);
-            println!("This is expected if parquet-testing submodule is not at PR #90 branch");
-        }
-    }
-}
-
-// Comprehensive test that shows test coverage and categorization
-#[test]
-fn test_variant_integration_comprehensive_analysis() {
-    // This test analyzes the comprehensive shredded_variant test data from PR #90
-    println!("Running comprehensive analysis of variant integration test data");
-
-    match VariantIntegrationHarness::new() {
-        Ok(harness) => {
-            println!(
-                "Successfully loaded test harness with {} test cases",
-                harness.test_cases.len()
-            );
-
-            // Analyze test breakdown by category
-            let mut category_counts = std::collections::HashMap::new();
-            let mut error_cases = Vec::new();
-            let mut success_cases = Vec::new();
-
-            for test_case in &harness.test_cases {
-                *category_counts
-                    .entry(test_case.test_category.clone())
-                    .or_insert(0) += 1;
-
-                if test_case.error_message.is_some() {
-                    error_cases.push(test_case);
-                } else {
-                    success_cases.push(test_case);
-                }
-            }
-
-            println!("\nTest Coverage Analysis:");
-            println!(
-                "  Primitives: {}",
-                category_counts.get(&TestCategory::Primitives).unwrap_or(&0)
-            );
-            println!(
-                "  Arrays: {}",
-                category_counts.get(&TestCategory::Arrays).unwrap_or(&0)
-            );
-            println!(
-                "  Objects: {}",
-                category_counts.get(&TestCategory::Objects).unwrap_or(&0)
-            );
-            println!(
-                "  Error Handling: {}",
-                category_counts
-                    .get(&TestCategory::ErrorHandling)
-                    .unwrap_or(&0)
-            );
-            println!(
-                "  Schema Validation: {}",
-                category_counts
-                    .get(&TestCategory::SchemaValidation)
-                    .unwrap_or(&0)
-            );
-            println!(
-                "  Complex: {}",
-                category_counts.get(&TestCategory::Complex).unwrap_or(&0)
-            );
-            println!("  Total Success Cases: {}", success_cases.len());
-            println!("  Total Error Cases: {}", error_cases.len());
-
-            // Test a representative sample from each category
-            let test_cases_to_check = [1, 2, 4, 5, 6];
-            let mut validated_cases = 0;
-
-            println!("\nValidating representative test cases:");
-            for case_number in test_cases_to_check {
-                if let Some(test_case) = harness
-                    .test_cases
-                    .iter()
-                    .find(|c| c.case_number == case_number)
-                {
-                    println!(
-                        "Case-{:03} ({:?}): {} -> {}",
-                        case_number,
-                        test_case.test_category,
-                        test_case.parquet_file,
-                        test_case
-                            .variant_file
-                            .as_deref()
-                            .unwrap_or("no variant file")
-                    );
-
-                    // Verify files exist
-                    let parquet_path = harness.test_data_dir.join(&test_case.parquet_file);
-                    assert!(
-                        parquet_path.exists(),
-                        "Parquet file should exist: {}",
-                        parquet_path.display()
-                    );
-
-                    if let Some(variant_file) = &test_case.variant_file {
-                        let variant_path = harness.test_data_dir.join(variant_file);
-                        assert!(
-                            variant_path.exists(),
-                            "Variant file should exist: {}",
-                            variant_path.display()
-                        );
-
-                        if let Ok(variant_data) = fs::read(&variant_path) {
-                            println!("  Variant data: {} bytes", variant_data.len());
-                        }
-                    }
-
-                    validated_cases += 1;
-                }
-            }
-
-            println!("\nError test cases found:");
-            for error_case in error_cases.iter().take(3) {
-                println!(
-                    "  Case-{:03}: {} - {}",
-                    error_case.case_number,
-                    error_case.test.as_deref().unwrap_or("unknown"),
-                    error_case
-                        .error_message
-                        .as_deref()
-                        .unwrap_or("no error message")
-                );
-            }
-
-            assert!(
-                validated_cases >= 3,
-                "Should validate at least 3 test cases"
-            );
-            assert!(
-                !harness.test_cases.is_empty(),
-                "Should have loaded test cases"
-            );
-            println!("\nComprehensive analysis completed successfully!");
-        }
-        Err(e) => {
-            println!("Could not find shredded_variant test data: {}", e);
-            println!("This is expected if parquet-testing submodule is not at PR #90 branch");
-
-            // Don't fail the test if data isn't available, just report it
-            // This allows the test to work in different environments
+/// replaces all instances of Binary with BinaryView in a DataType
+fn map_type(data_type: &DataType) -> DataType {
+    match data_type {
+        DataType::Binary => DataType::BinaryView,
+        DataType::List(field) => {
+            let new_field = field
+                .as_ref()
+                .clone()
+                .with_data_type(map_type(field.data_type()));
+            DataType::List(Arc::new(new_field))
         }
-    }
-}
-
-// Test to verify error case handling works
-#[test]
-fn test_variant_integration_error_case_handling() {
-    // This test demonstrates that error cases are properly detected and handled
-    println!("Testing error case handling with actual error files");
-
-    match VariantIntegrationHarness::new() {
-        Ok(harness) => {
-            println!(
-                "Successfully loaded test harness with {} test cases",
-                harness.test_cases.len()
-            );
-
-            // Find and test a few error cases
-            let error_cases: Vec<_> = harness
-                .test_cases
+        DataType::Struct(fields) => {
+            let new_fields: Fields = fields
                 .iter()
-                .filter(|case| case.test_category == TestCategory::ErrorHandling)
-                .take(3)
+                .map(|f| {
+                    let new_field = f.as_ref().clone().with_data_type(map_type(f.data_type()));
+                    Arc::new(new_field)
+                })
                 .collect();
-
-            println!("Found {} error cases for testing", error_cases.len());
-
-            for error_case in &error_cases {
-                println!(
-                    "\nTesting error case-{:03}: {}",
-                    error_case.case_number,
-                    error_case.test.as_deref().unwrap_or("unknown")
-                );
-                println!(
-                    "  Expected error: {}",
-                    error_case
-                        .error_message
-                        .as_deref()
-                        .unwrap_or("no error message")
-                );
-
-                // Verify the parquet file exists (error cases should still have readable files)
-                let parquet_path = harness.test_data_dir.join(&error_case.parquet_file);
-                assert!(
-                    parquet_path.exists(),
-                    "Error case parquet file should exist: {}",
-                    parquet_path.display()
-                );
-
-                // Run the error case test (should handle gracefully)
-                match harness.run_single_test(error_case) {
-                    Ok(()) => println!("  Error case handled gracefully"),
-                    Err(e) => println!("  Error case processing issue: {}", e),
-                }
-            }
-
-            assert!(!error_cases.is_empty(), "Should have found error cases");
-            println!("\nError case handling test completed successfully!");
-        }
-        Err(e) => {
-            println!("Could not find shredded_variant test data: {}", e);
-            println!("This is expected if parquet-testing submodule is not at PR #90 branch");
-        }
-    }
-}
-
-// Working test that demonstrates the harness functionality
-#[test]
-fn test_variant_integration_with_shredded_variant_data() {
-    // This test uses the comprehensive shredded_variant test data from PR #90
-    println!("Running basic integration test with shredded variant test data");
-
-    match VariantIntegrationHarness::new() {
-        Ok(harness) => {
-            println!(
-                "Successfully loaded test harness with {} test cases",
-                harness.test_cases.len()
-            );
-
-            // Test a few basic cases to verify the framework works
-            let test_cases_to_check = [1, 2, 4, 5, 6];
-            let mut found_cases = 0;
-
-            for case_number in test_cases_to_check {
-                if let Some(test_case) = harness
-                    .test_cases
-                    .iter()
-                    .find(|c| c.case_number == case_number)
-                {
-                    println!(
-                        "Found case-{:03}: {} -> {}",
-                        case_number,
-                        test_case.parquet_file,
-                        test_case
-                            .variant_file
-                            .as_deref()
-                            .unwrap_or("no variant file")
-                    );
-                    found_cases += 1;
-
-                    // Verify files exist
-                    let parquet_path = harness.test_data_dir.join(&test_case.parquet_file);
-                    assert!(
-                        parquet_path.exists(),
-                        "Parquet file should exist: {}",
-                        parquet_path.display()
-                    );
-
-                    if let Some(variant_file) = &test_case.variant_file {
-                        let variant_path = harness.test_data_dir.join(variant_file);
-                        assert!(
-                            variant_path.exists(),
-                            "Variant file should exist: {}",
-                            variant_path.display()
-                        );
-
-                        if let Ok(variant_data) = fs::read(&variant_path) {
-                            println!("  Variant data: {} bytes", variant_data.len());
-                        }
-                    }
-                }
-            }
-
-            assert!(found_cases >= 3, "Should find at least 3 test cases");
-            println!("Successfully validated {} test cases", found_cases);
-        }
-        Err(e) => {
-            println!("Could not find shredded_variant test data: {}", e);
-            println!("This is expected if parquet-testing submodule is not at PR #90 branch");
-
-            // Don't fail the test if data isn't available, just report it
-            // This allows the test to work in different environments
+            DataType::Struct(new_fields)
         }
+        _ => data_type.clone(),
     }
 }
 
-// Fallback test using existing variant test data if shredded_variant is not available
-#[test]
-fn test_variant_integration_with_existing_data() {
-    // This test uses the existing variant test data in parquet-testing/variant/
-    // as a fallback until the shredded_variant data from PR #90 is available
-
-    println!("Running fallback test with existing variant test data");
-
-    // Try to find existing variant test data
-    let variant_dir = find_existing_variant_test_data();
-
-    match variant_dir {
-        Ok(dir) => {
-            println!("Found existing variant test data at: {}", dir.display());
-
-            // List available test files
-            if let Ok(entries) = fs::read_dir(&dir) {
-                let mut metadata_files = Vec::new();
-                for entry in entries.flatten() {
-                    if let Some(name) = entry.file_name().to_str() {
-                        if name.ends_with(".metadata") {
-                            metadata_files.push(name.to_string());
-                        }
-                    }
-                }
-
-                println!("Found {} metadata files for testing", metadata_files.len());
-                assert!(
-                    !metadata_files.is_empty(),
-                    "Should find at least some metadata files"
-                );
-
-                // Test loading a few basic cases
-                for metadata_file in metadata_files.iter().take(3) {
-                    let case_name = metadata_file.strip_suffix(".metadata").unwrap();
-                    match test_load_existing_variant_case(&dir, case_name) {
-                        Ok(()) => println!("Successfully loaded variant case: {}", case_name),
-                        Err(e) => println!("Failed to load variant case {}: {}", case_name, e),
-                    }
-                }
-            }
-        }
-        Err(e) => {
-            println!("Could not find variant test data: {}", e);
-            println!("This is expected if parquet-testing submodule is not initialized");
-        }
-    }
+/// Variant value loaded from .variant.bin file
+#[derive(Debug, Clone)]
+struct ExpectedVariant {
+    data: Vec<u8>,
+    data_offset: usize,
 }
 
-/// Find existing variant test data directory
-fn find_existing_variant_test_data() -> Result<PathBuf, Box<dyn Error>> {
-    if let Ok(dir) = env::var("PARQUET_TEST_DATA") {
-        let variant_dir = PathBuf::from(dir).join("../variant");
-        if variant_dir.is_dir() {
-            return Ok(variant_dir);
-        }
+impl ExpectedVariant {
+    fn try_load(path: &Path) -> Result<Self> {
+        // "Each `*.variant.bin` file contains a single variant serialized
+        // by concatenating the serialized bytes of the variant metadata
+        // followed by the serialized bytes of the variant value."
+        let data = fs::read(path).map_err(|e| format!("cannot read variant file {path:?}: {e}"))?;
+        let metadata = VariantMetadata::try_new(&data)
+            .map_err(|e| format!("cannot parse variant metadata from {path:?}: {e}"))?;
+
+        let data_offset = metadata.size();
+        Ok(Self { data, data_offset })
     }
 
-    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string());
-    let candidates = vec![
-        PathBuf::from(&manifest_dir).join("../parquet-testing/variant"),
-        PathBuf::from(&manifest_dir).join("parquet-testing/variant"),
-    ];
-
-    for candidate in candidates {
-        if candidate.is_dir() {
-            return Ok(candidate);
-        }
+    fn as_variant(&self) -> Variant<'_, '_> {
+        let metadata = &self.data[0..self.data_offset];
+        let value = &self.data[self.data_offset..];
+        Variant::try_new(metadata, value).expect("Invalid variant data")
     }
-
-    Err("Could not find existing variant test data directory".into())
 }
 
-/// Test loading a single variant case from existing test data
-fn test_load_existing_variant_case(
-    variant_dir: &Path,
-    case_name: &str,
-) -> Result<(), Box<dyn Error>> {
-    let metadata_path = variant_dir.join(format!("{}.metadata", case_name));
-    let value_path = variant_dir.join(format!("{}.value", case_name));
+static TEST_CASE_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
+    PathBuf::from(parquet_test_data())
+        .join("..")
+        .join("shredded_variant")
+});
 
-    if !metadata_path.exists() || !value_path.exists() {
-        return Err(format!("Missing files for case: {}", case_name).into());
+/// All tests
+static ALL_CASES: LazyLock<Result<Vec<VariantTestCase>>> = LazyLock::new(|| {
+    let cases_file = TEST_CASE_DIR.join("cases.json");
+
+    if !cases_file.exists() {
+        return Err(format!("cases.json not found at {}", cases_file.display()));
     }
 
-    let _metadata = fs::read(&metadata_path)?;
-    let _value = fs::read(&value_path)?;
+    let content = fs::read_to_string(&cases_file)
+        .map_err(|e| format!("cannot read cases file {cases_file:?}: {e}"))?;
 
-    // TODO: Parse variant when parquet_variant crate is available
-    // let _variant = Variant::try_new(&metadata, &value)?;
+    serde_json::from_str::<Vec<VariantTestCase>>(content.as_str())
+        .map_err(|e| format!("cannot parse json from {cases_file:?}: {e}"))
+});
 
-    Ok(())
+// return a reference to the static ALL_CASES, or panic if loading failed
+fn all_cases() -> &'static [VariantTestCase] {
+    ALL_CASES.as_ref().unwrap()
 }

From 136c2d857924e7d76c570095bcef98fef73f908c Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 12 Sep 2025 06:50:34 -0400
Subject: [PATCH 3/8] fix docs

---
 parquet-variant/src/variant/metadata.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs
index 14d048631de7..7944e45a4df5 100644
--- a/parquet-variant/src/variant/metadata.rs
+++ b/parquet-variant/src/variant/metadata.rs
@@ -310,7 +310,11 @@ impl<'m> VariantMetadata<'m> {
         self.header.offset_size.unpack_u32(bytes, i)
     }
 
-    /// Returns the last byte offset of the metadata dictionary, which is also the total size of
+    /// Returns the total size, in bytes, of the metadata.
+    ///
+    /// Note this value may be smaller than what was passed to [`Self::new`] or
+    /// [`Self::try_new`] if the input was larger than necessary to encode the
+    /// metadata dictionary.
     pub fn size(&self) -> usize {
         self.bytes.len()
     }

From 675f88b3a11605a98aba284d9555d75bb06ab715 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 12 Sep 2025 06:51:54 -0400
Subject: [PATCH 4/8] clarify comments

---
 parquet/tests/variant_integration.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/parquet/tests/variant_integration.rs b/parquet/tests/variant_integration.rs
index fe863462a660..0c48eb7839b1 100644
--- a/parquet/tests/variant_integration.rs
+++ b/parquet/tests/variant_integration.rs
@@ -63,8 +63,10 @@ macro_rules! variant_test_case {
 }
 
 // Generate test functions for each case
-// Note that case 3 is empty in cases.json, so we skip it here
-// Only cases 40, 42, 87, 127 and 128 are expected to fail, the rest should pass
+// Notes
+// - case 3 is empty in cases.json for some reason
+// - cases 40, 42, 87, 127 and 128 are expected to fail always (they include invalid variants)
+// - the remaining cases are expected to (eventually) pass
 
 variant_test_case!(1, "Unsupported typed_value type: List(");
 variant_test_case!(2, "Unsupported typed_value type: List(");

From 60d9425c0ecd427b836170514ce0adb60052f12a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 12 Sep 2025 07:17:56 -0400
Subject: [PATCH 5/8] Add issue links

---
 parquet/tests/variant_integration.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/parquet/tests/variant_integration.rs b/parquet/tests/variant_integration.rs
index 0c48eb7839b1..653b1173d969 100644
--- a/parquet/tests/variant_integration.rs
+++ b/parquet/tests/variant_integration.rs
@@ -77,6 +77,7 @@ variant_test_case!(2, "Unsupported typed_value type: List(");
 // },
 // ```
 variant_test_case!(3, "parquet_file must be set");
+// https://github.com/apache/arrow-rs/issues/8329
 variant_test_case!(4, "Unsupported typed_value type: Boolean");
 variant_test_case!(5, "Unsupported typed_value type: Boolean");
 variant_test_case!(6);
@@ -91,8 +92,10 @@ variant_test_case!(14);
 variant_test_case!(15);
 variant_test_case!(16);
 variant_test_case!(17);
+// https://github.com/apache/arrow-rs/issues/8330
 variant_test_case!(18, "Unsupported typed_value type: Date32");
 variant_test_case!(19, "Unsupported typed_value type: Date32");
+// https://github.com/apache/arrow-rs/issues/8331
 variant_test_case!(
     20,
     "Unsupported typed_value type: Timestamp(Microsecond, Some(\"UTC\"))"
@@ -109,15 +112,19 @@ variant_test_case!(
     23,
     "Unsupported typed_value type: Timestamp(Microsecond, None)"
 );
+// https://github.com/apache/arrow-rs/issues/8332
 variant_test_case!(24, "Unsupported typed_value type: Decimal128(9, 4)");
 variant_test_case!(25, "Unsupported typed_value type: Decimal128(9, 4)");
 variant_test_case!(26, "Unsupported typed_value type: Decimal128(18, 9)");
 variant_test_case!(27, "Unsupported typed_value type: Decimal128(18, 9)");
 variant_test_case!(28, "Unsupported typed_value type: Decimal128(38, 9)");
 variant_test_case!(29, "Unsupported typed_value type: Decimal128(38, 9)");
+// https://github.com/apache/arrow-rs/issues/8333
 variant_test_case!(30, "Unsupported typed_value type: BinaryView");
 variant_test_case!(31, "Unsupported typed_value type: Utf8");
+// https://github.com/apache/arrow-rs/issues/8334
 variant_test_case!(32, "Unsupported typed_value type: Time64(Microsecond)");
+// https://github.com/apache/arrow-rs/issues/8331
 variant_test_case!(
     33,
     "Unsupported typed_value type: Timestamp(Nanosecond, Some(\"UTC\"))"
@@ -134,7 +141,9 @@ variant_test_case!(
     36,
     "Unsupported typed_value type: Timestamp(Nanosecond, None)"
 );
+// https://github.com/apache/arrow-rs/issues/8335
 variant_test_case!(37, "Unsupported typed_value type: FixedSizeBinary(16)");
+// https://github.com/apache/arrow-rs/issues/8336
 variant_test_case!(38, "Unsupported typed_value type: Struct(");
 variant_test_case!(39);
 // Is an error case (should be failing as the expected error message indicates)
@@ -145,8 +154,10 @@ variant_test_case!(
     42,
     "Expected an error 'Invalid variant, conflicting value and typed_value`, but got no error"
 );
+// https://github.com/apache/arrow-rs/issues/8336
 variant_test_case!(43, "Unsupported typed_value type: Struct([Field");
 variant_test_case!(44, "Unsupported typed_value type: Struct([Field");
+// https://github.com/apache/arrow-rs/issues/8337
 variant_test_case!(45, "Unsupported typed_value type: List(Field");
 variant_test_case!(46, "Unsupported typed_value type: Struct([Field");
 variant_test_case!(47);
@@ -185,8 +196,10 @@ variant_test_case!(79);
 variant_test_case!(80);
 variant_test_case!(81);
 variant_test_case!(82);
+// https://github.com/apache/arrow-rs/issues/8336
 variant_test_case!(83, "Unsupported typed_value type: Struct([Field");
 variant_test_case!(84, "Unsupported typed_value type: Struct([Field");
+// https://github.com/apache/arrow-rs/issues/8337
 variant_test_case!(85, "Unsupported typed_value type: List(Field");
 variant_test_case!(86, "Unsupported typed_value type: List(Field");
 // Is an error case (should be failing as the expected error message indicates)

From 94a9f2dc3287c4aa6ced99468dadfc3e7346b389 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 15 Sep 2025 16:24:36 -0400
Subject: [PATCH 6/8] Update tests

---
 parquet/tests/variant_integration.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/parquet/tests/variant_integration.rs b/parquet/tests/variant_integration.rs
index 653b1173d969..7c8dffc11d9c 100644
--- a/parquet/tests/variant_integration.rs
+++ b/parquet/tests/variant_integration.rs
@@ -78,8 +78,8 @@ variant_test_case!(2, "Unsupported typed_value type: List(");
 // ```
 variant_test_case!(3, "parquet_file must be set");
 // https://github.com/apache/arrow-rs/issues/8329
-variant_test_case!(4, "Unsupported typed_value type: Boolean");
-variant_test_case!(5, "Unsupported typed_value type: Boolean");
+variant_test_case!(4);
+variant_test_case!(5);
 variant_test_case!(6);
 variant_test_case!(7);
 variant_test_case!(8);

From 2f4aacf0c7d4bbbb39228117af885a61a59beebd Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 16 Sep 2025 14:38:30 -0400
Subject: [PATCH 7/8] Return Variant::Uuid for FixedSizeBianry of 16 when
 possible

---
 parquet-variant-compute/src/variant_array.rs | 13 ++++++++++---
 parquet-variant/src/variant.rs               |  7 +++++--
 parquet/tests/variant_integration.rs         |  3 +--
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs
index c3f3ad54131c..9ecc631da627 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -17,6 +17,7 @@
 
 //! [`VariantArray`] implementation
 
+use crate::type_conversion::primitive_conversion_single_value;
 use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray};
 use arrow::buffer::NullBuffer;
 use arrow::datatypes::{
@@ -24,12 +25,11 @@ use arrow::datatypes::{
     UInt32Type, UInt64Type, UInt8Type,
 };
 use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
+use parquet_variant::Uuid;
 use parquet_variant::Variant;
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::type_conversion::primitive_conversion_single_value;
-
 /// An array of Parquet [`Variant`] values
 ///
 /// A [`VariantArray`] wraps an Arrow [`StructArray`] that stores the underlying
@@ -556,8 +556,15 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, '
             let value = boolean_array.value(index);
             Variant::from(value)
         }
-        DataType::FixedSizeBinary(_) => {
+        DataType::FixedSizeBinary(binary_len) => {
             let array = typed_value.as_fixed_size_binary();
+            // Try to treat 16 byte FixedSizeBinary as UUID
+            let value = array.value(index);
+            if *binary_len == 16 {
+                if let Ok(uuid) = Uuid::from_slice(&value) {
+                    return Variant::from(uuid);
+                }
+            }
             let value = array.value(index);
             Variant::from(value)
         }
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index cc4c3bcadd66..faaab94bc3fd 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -19,6 +19,11 @@ pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
 pub use self::list::VariantList;
 pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES};
 pub use self::object::VariantObject;
+
+// Publically export types used in the API
+pub use half::f16;
+pub use uuid::Uuid;
+
 use crate::decoder::{
     self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
 };
@@ -28,8 +33,6 @@ use std::ops::Deref;
 
 use arrow_schema::ArrowError;
 use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
-use half::f16;
-use uuid::Uuid;
 
 mod decimal;
 mod list;
diff --git a/parquet/tests/variant_integration.rs b/parquet/tests/variant_integration.rs
index 7c8dffc11d9c..6a586e013ef5 100644
--- a/parquet/tests/variant_integration.rs
+++ b/parquet/tests/variant_integration.rs
@@ -141,8 +141,7 @@ variant_test_case!(
     36,
     "Unsupported typed_value type: Timestamp(Nanosecond, None)"
 );
-// https://github.com/apache/arrow-rs/issues/8335
-variant_test_case!(37, "Unsupported typed_value type: FixedSizeBinary(16)");
+variant_test_case!(37);
 // https://github.com/apache/arrow-rs/issues/8336
 variant_test_case!(38, "Unsupported typed_value type: Struct(");
 variant_test_case!(39);

From cdb8943f0846068ac684306085bccf090493b688 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 16 Sep 2025 14:38:51 -0400
Subject: [PATCH 8/8] clippy

---
 parquet-variant-compute/src/variant_array.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs
index 9ecc631da627..e87d03f88c5b 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -561,7 +561,7 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, '
             // Try to treat 16 byte FixedSizeBinary as UUID
             let value = array.value(index);
             if *binary_len == 16 {
-                if let Ok(uuid) = Uuid::from_slice(&value) {
+                if let Ok(uuid) = Uuid::from_slice(value) {
                     return Variant::from(uuid);
                 }
             }