Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
e3a0b50
custom PageLocation decoder for speed
etseidl Aug 20, 2025
71d3859
fix recently added test
etseidl Aug 20, 2025
ff42e5a
clippy
etseidl Aug 20, 2025
1f2c216
experimental new form for column index
etseidl Aug 20, 2025
37f3b20
fix for test added in main
etseidl Aug 21, 2025
3d4e28e
refactor new column index
etseidl Aug 21, 2025
2b85b89
checkpoint...everything but stats converter
etseidl Aug 21, 2025
5ee1b8f
fix bug found in testing
etseidl Aug 21, 2025
624b88b
Merge branch 'new_col_idx' into new_col_idx_full
etseidl Aug 21, 2025
d99a06a
stats converter works
etseidl Aug 22, 2025
79a6917
get rid of import
etseidl Aug 22, 2025
878d460
get parquet-index working
etseidl Aug 22, 2025
009632a
doc fixes
etseidl Aug 22, 2025
998ac6c
Merge branch 'offset_idx_speedup' into new_col_idx_full
etseidl Aug 22, 2025
a822dfd
move column index to its own module
etseidl Aug 22, 2025
20df075
add ColumnIndexIterators trait, simplify stats converter a little
etseidl Aug 22, 2025
7755b7b
restore comment
etseidl Aug 22, 2025
66ed8bc
Merge branch 'new_col_idx' into new_col_idx_full
etseidl Aug 22, 2025
f6c5738
further rework...allow for fallback to slow decoder
etseidl Aug 24, 2025
3733b86
Merge branch 'offset_idx_speedup' into new_col_idx_full
etseidl Aug 24, 2025
09d71e1
refactor a bit
etseidl Aug 24, 2025
1ddaa35
simplify reading of int array
etseidl Aug 24, 2025
006d59d
Merge branch 'offset_idx_speedup' into new_col_idx_full
etseidl Aug 24, 2025
c271085
get write working for enum and some unions
etseidl Aug 25, 2025
34cdaf2
make test_roundtrip visible
etseidl Aug 25, 2025
c9be570
add test for converted_type, start on logical_type
etseidl Aug 25, 2025
a9cd09d
checkpoint struct field writing
etseidl Aug 25, 2025
ae65167
get some struct examples and lists working
etseidl Aug 25, 2025
272a013
get rid of copied allow
etseidl Aug 25, 2025
632e171
get writer macros for structs working
etseidl Aug 26, 2025
9f01b60
fix bug in struct macro
etseidl Aug 26, 2025
2511f8f
make Repetition public
etseidl Aug 26, 2025
61e9e07
get union working for writes
etseidl Aug 26, 2025
e39f119
add some tests
etseidl Aug 26, 2025
def3d07
redo OrderedF64 initialization
etseidl Aug 26, 2025
386f222
unused import
etseidl Aug 26, 2025
7ae2304
Merge branch 'gh5854_thrift_remodel' into write_thrift
etseidl Aug 26, 2025
6beb79d
get decryption working
etseidl Aug 26, 2025
1eaa17b
refactor and clippy fixes
etseidl Aug 26, 2025
713e38a
add page header defs
etseidl Aug 26, 2025
1ebfdf2
Merge branch 'gh5854_thrift_remodel' into write_thrift
etseidl Aug 27, 2025
366326a
Merge branch 'write_thrift' into read_and_crypto
etseidl Aug 27, 2025
d8081a9
fix clippy issues
etseidl Aug 28, 2025
5d6c8b1
allow unused page header structs
etseidl Aug 28, 2025
709e813
remove Write from WriteThrift
etseidl Aug 29, 2025
def1d68
Merge branch 'write_thrift' into read_and_crypto
etseidl Aug 29, 2025
0579456
finish merge
etseidl Aug 29, 2025
689297c
Merge branch 'gh5854_thrift_remodel' into write_thrift
etseidl Aug 30, 2025
7d47857
Merge branch 'write_thrift' into read_and_crypto
etseidl Aug 30, 2025
138b0d5
Merge branch 'gh5854_thrift_remodel' into write_thrift
etseidl Sep 5, 2025
5b6c177
Merge branch 'write_thrift' into read_and_crypto
etseidl Sep 5, 2025
c729d22
Merge remote-tracking branch 'origin/gh5854_thrift_remodel' into writ…
etseidl Sep 8, 2025
96419c4
Merge branch 'write_thrift' into read_and_crypto
etseidl Sep 8, 2025
f81a732
get a start on some documentation and add some TODOs
etseidl Sep 10, 2025
be58ea6
Merge branch 'write_thrift' into read_and_crypto
etseidl Sep 10, 2025
7268dd3
fix docs
etseidl Sep 10, 2025
8305915
Merge branch 'write_thrift' into read_and_crypto
etseidl Sep 10, 2025
f0beb0b
Merge branch 'gh5854_thrift_remodel' into read_and_crypto
etseidl Sep 10, 2025
cfa6740
backport fix for tests without encryption
etseidl Sep 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 1 addition & 93 deletions parquet/src/file/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,7 @@ pub(crate) mod thrift_gen;
mod writer;

#[cfg(feature = "encryption")]
use crate::encryption::{
decrypt::FileDecryptor,
modules::{create_module_aad, ModuleType},
};
use crate::encryption::decrypt::FileDecryptor;
#[cfg(feature = "encryption")]
use crate::file::column_crypto_metadata::{self, ColumnCryptoMetaData};
pub(crate) use crate::file::metadata::memory::HeapSize;
Expand All @@ -117,8 +114,6 @@ use crate::schema::types::{
ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor,
Type as SchemaType,
};
#[cfg(feature = "encryption")]
use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
use crate::{
basic::BoundaryOrder,
errors::{ParquetError, Result},
Expand Down Expand Up @@ -684,93 +679,6 @@ impl RowGroupMetaData {
self.file_offset
}

/// Method to convert from encrypted Thrift.
#[cfg(feature = "encryption")]
fn from_encrypted_thrift(
schema_descr: SchemaDescPtr,
mut rg: crate::format::RowGroup,
decryptor: Option<&FileDecryptor>,
) -> Result<RowGroupMetaData> {
if schema_descr.num_columns() != rg.columns.len() {
return Err(general_err!(
"Column count mismatch. Schema has {} columns while Row Group has {}",
schema_descr.num_columns(),
rg.columns.len()
));
}
let total_byte_size = rg.total_byte_size;
let num_rows = rg.num_rows;
let mut columns = vec![];

for (i, (mut c, d)) in rg
.columns
.drain(0..)
.zip(schema_descr.columns())
.enumerate()
{
// Read encrypted metadata if it's present and we have a decryptor.
if let (true, Some(decryptor)) = (c.encrypted_column_metadata.is_some(), decryptor) {
let column_decryptor = match c.crypto_metadata.as_ref() {
None => {
return Err(general_err!(
"No crypto_metadata is set for column '{}', which has encrypted metadata",
d.path().string()
));
}
Some(TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(crypto_metadata)) => {
let column_name = crypto_metadata.path_in_schema.join(".");
decryptor.get_column_metadata_decryptor(
column_name.as_str(),
crypto_metadata.key_metadata.as_deref(),
)?
}
Some(TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_)) => {
decryptor.get_footer_decryptor()?
}
};

let column_aad = create_module_aad(
decryptor.file_aad(),
ModuleType::ColumnMetaData,
rg.ordinal.unwrap() as usize,
i,
None,
)?;

let buf = c.encrypted_column_metadata.clone().unwrap();
let decrypted_cc_buf = column_decryptor
.decrypt(buf.as_slice(), column_aad.as_ref())
.map_err(|_| {
general_err!(
"Unable to decrypt column '{}', perhaps the column key is wrong?",
d.path().string()
)
})?;

let mut prot = TCompactSliceInputProtocol::new(decrypted_cc_buf.as_slice());
c.meta_data = Some(crate::format::ColumnMetaData::read_from_in_protocol(
&mut prot,
)?);
}
columns.push(ColumnChunkMetaData::from_thrift(d.clone(), c)?);
}

let sorting_columns = rg.sorting_columns.map(|scs| {
scs.iter()
.map(|sc| sc.into())
.collect::<Vec<SortingColumn>>()
});
Ok(RowGroupMetaData {
columns,
num_rows,
sorting_columns,
total_byte_size,
schema_descr,
file_offset: rg.file_offset,
ordinal: rg.ordinal,
})
}

/// Method to convert from Thrift.
pub fn from_thrift(
schema_descr: SchemaDescPtr,
Expand Down
Loading
Loading