Skip to content

Commit 53f785c

Browse files
authored
Merge branch 'main' into ctty/sort-partition
2 parents 0c67c43 + 979cc07 commit 53f785c

File tree

34 files changed

+1896
-965
lines changed

34 files changed

+1896
-965
lines changed

Cargo.lock

Lines changed: 976 additions & 875 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ async-std = "1.12"
5555
async-trait = "0.1.88"
5656
aws-config = "1.6.1"
5757
aws-sdk-glue = "1.39"
58+
aws-sdk-s3tables = "1.28.0"
5859
backon = "1.5.1"
5960
base64 = "0.22.1"
6061
bimap = "0.6"
@@ -73,10 +74,13 @@ faststr = "0.2.31"
7374
fnv = "1.0.7"
7475
fs-err = "3.1.0"
7576
futures = "0.3"
76-
hive_metastore = "0.1"
77+
hive_metastore = "0.2.0"
7778
http = "1.2"
7879
iceberg = { version = "0.6.0", path = "./crates/iceberg" }
7980
iceberg-catalog-rest = { version = "0.6.0", path = "./crates/catalog/rest" }
81+
iceberg-catalog-glue = { version = "0.6.0", path = "./crates/catalog/glue" }
82+
iceberg-catalog-s3tables = { version = "0.6.0", path = "./crates/catalog/s3tables" }
83+
iceberg-catalog-hms = { version = "0.6.0", path = "./crates/catalog/hms" }
8084
iceberg-datafusion = { version = "0.6.0", path = "./crates/integrations/datafusion" }
8185
indicatif = "0.17"
8286
itertools = "0.13"
@@ -92,7 +96,7 @@ once_cell = "1.20"
9296
opendal = "0.54.0"
9397
ordered-float = "4"
9498
parquet = "55"
95-
pilota = "0.11.2"
99+
pilota = "0.11.10"
96100
port_scanner = "0.1.5"
97101
pretty_assertions = "1.4"
98102
rand = "0.8.5"
@@ -120,5 +124,5 @@ typed-builder = "0.20"
120124
url = "2.5.4"
121125
uuid = { version = "1.16", features = ["v7"] }
122126
volo = "0.10.6"
123-
volo-thrift = "0.10.6"
127+
volo-thrift = "0.10.8"
124128
zstd = "0.13.2"

crates/catalog/glue/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ iceberg = { workspace = true }
3737
serde_json = { workspace = true }
3838
tokio = { workspace = true }
3939
tracing = { workspace = true }
40-
typed-builder = { workspace = true }
4140

4241
[dev-dependencies]
4342
ctor = { workspace = true }

crates/catalog/glue/LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../LICENSE

crates/catalog/glue/NOTICE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../NOTICE

crates/catalog/glue/src/catalog.rs

Lines changed: 83 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,9 @@ use iceberg::io::{
2626
use iceberg::spec::{TableMetadata, TableMetadataBuilder};
2727
use iceberg::table::Table;
2828
use iceberg::{
29-
Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit,
30-
TableCreation, TableIdent,
29+
Catalog, CatalogBuilder, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result,
30+
TableCommit, TableCreation, TableIdent,
3131
};
32-
use typed_builder::TypedBuilder;
3332

3433
use crate::error::{from_aws_build_error, from_aws_sdk_error};
3534
use crate::utils::{
@@ -40,15 +39,90 @@ use crate::{
4039
AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, with_catalog_id,
4140
};
4241

43-
#[derive(Debug, TypedBuilder)]
42+
/// Glue catalog URI
43+
pub const GLUE_CATALOG_PROP_URI: &str = "uri";
44+
/// Glue catalog id
45+
pub const GLUE_CATALOG_PROP_CATALOG_ID: &str = "catalog_id";
46+
/// Glue catalog warehouse location
47+
pub const GLUE_CATALOG_PROP_WAREHOUSE: &str = "warehouse";
48+
49+
/// Builder for [`GlueCatalog`].
50+
#[derive(Debug)]
51+
pub struct GlueCatalogBuilder(GlueCatalogConfig);
52+
53+
impl Default for GlueCatalogBuilder {
54+
fn default() -> Self {
55+
Self(GlueCatalogConfig {
56+
name: None,
57+
uri: None,
58+
catalog_id: None,
59+
warehouse: "".to_string(),
60+
props: HashMap::new(),
61+
})
62+
}
63+
}
64+
65+
impl CatalogBuilder for GlueCatalogBuilder {
66+
type C = GlueCatalog;
67+
68+
fn load(
69+
mut self,
70+
name: impl Into<String>,
71+
props: HashMap<String, String>,
72+
) -> impl Future<Output = Result<Self::C>> + Send {
73+
self.0.name = Some(name.into());
74+
75+
if props.contains_key(GLUE_CATALOG_PROP_URI) {
76+
self.0.uri = props.get(GLUE_CATALOG_PROP_URI).cloned()
77+
}
78+
79+
if props.contains_key(GLUE_CATALOG_PROP_CATALOG_ID) {
80+
self.0.catalog_id = props.get(GLUE_CATALOG_PROP_CATALOG_ID).cloned()
81+
}
82+
83+
if props.contains_key(GLUE_CATALOG_PROP_WAREHOUSE) {
84+
self.0.warehouse = props
85+
.get(GLUE_CATALOG_PROP_WAREHOUSE)
86+
.cloned()
87+
.unwrap_or_default();
88+
}
89+
90+
// Collect other remaining properties
91+
self.0.props = props
92+
.into_iter()
93+
.filter(|(k, _)| {
94+
k != GLUE_CATALOG_PROP_URI
95+
&& k != GLUE_CATALOG_PROP_CATALOG_ID
96+
&& k != GLUE_CATALOG_PROP_WAREHOUSE
97+
})
98+
.collect();
99+
100+
async move {
101+
if self.0.name.is_none() {
102+
return Err(Error::new(
103+
ErrorKind::DataInvalid,
104+
"Catalog name is required",
105+
));
106+
}
107+
if self.0.warehouse.is_empty() {
108+
return Err(Error::new(
109+
ErrorKind::DataInvalid,
110+
"Catalog warehouse is required",
111+
));
112+
}
113+
114+
GlueCatalog::new(self.0).await
115+
}
116+
}
117+
}
118+
119+
#[derive(Debug)]
44120
/// Glue Catalog configuration
45-
pub struct GlueCatalogConfig {
46-
#[builder(default, setter(strip_option(fallback = uri_opt)))]
121+
pub(crate) struct GlueCatalogConfig {
122+
name: Option<String>,
47123
uri: Option<String>,
48-
#[builder(default, setter(strip_option(fallback = catalog_id_opt)))]
49124
catalog_id: Option<String>,
50125
warehouse: String,
51-
#[builder(default)]
52126
props: HashMap<String, String>,
53127
}
54128

@@ -71,7 +145,7 @@ impl Debug for GlueCatalog {
71145

72146
impl GlueCatalog {
73147
/// Create a new glue catalog
74-
pub async fn new(config: GlueCatalogConfig) -> Result<Self> {
148+
async fn new(config: GlueCatalogConfig) -> Result<Self> {
75149
let sdk_config = create_sdk_config(&config.props, config.uri.as_ref()).await;
76150
let mut file_io_props = config.props.clone();
77151
if !file_io_props.contains_key(S3_ACCESS_KEY_ID) {

crates/catalog/glue/src/lib.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,30 @@
1616
// under the License.
1717

1818
//! Iceberg Glue Catalog implementation.
19+
//!
20+
//! To build a glue catalog with configurations
21+
//! # Example
22+
//!
23+
//! ```rust, no_run
24+
//! use std::collections::HashMap;
25+
//!
26+
//! use iceberg::CatalogBuilder;
27+
//! use iceberg_catalog_glue::{GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalogBuilder};
28+
//!
29+
//! #[tokio::main]
30+
//! async fn main() {
31+
//! let catalog = GlueCatalogBuilder::default()
32+
//! .load(
33+
//! "glue",
34+
//! HashMap::from([(
35+
//! GLUE_CATALOG_PROP_WAREHOUSE.to_string(),
36+
//! "s3://warehouse".to_string(),
37+
//! )]),
38+
//! )
39+
//! .await
40+
//! .unwrap();
41+
//! }
42+
//! ```
1943
2044
#![deny(missing_docs)]
2145

crates/catalog/glue/tests/glue_catalog_test.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,12 @@ use std::sync::RwLock;
2424
use ctor::{ctor, dtor};
2525
use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
2626
use iceberg::spec::{NestedField, PrimitiveType, Schema, Type};
27-
use iceberg::{Catalog, Namespace, NamespaceIdent, Result, TableCreation, TableIdent};
27+
use iceberg::{
28+
Catalog, CatalogBuilder, Namespace, NamespaceIdent, Result, TableCreation, TableIdent,
29+
};
2830
use iceberg_catalog_glue::{
29-
AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, GlueCatalog, GlueCatalogConfig,
31+
AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, GLUE_CATALOG_PROP_URI,
32+
GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalog, GlueCatalogBuilder,
3033
};
3134
use iceberg_test_utils::docker::DockerCompose;
3235
use iceberg_test_utils::{normalize_test_name, set_up};
@@ -112,13 +115,22 @@ async fn get_catalog() -> GlueCatalog {
112115
retries += 1;
113116
}
114117

115-
let config = GlueCatalogConfig::builder()
116-
.uri(format!("http://{}", glue_socket_addr))
117-
.warehouse("s3a://warehouse/hive".to_string())
118-
.props(props.clone())
119-
.build();
118+
let mut glue_props = HashMap::from([
119+
(
120+
GLUE_CATALOG_PROP_URI.to_string(),
121+
format!("http://{}", glue_socket_addr),
122+
),
123+
(
124+
GLUE_CATALOG_PROP_WAREHOUSE.to_string(),
125+
"s3a://warehouse/hive".to_string(),
126+
),
127+
]);
128+
glue_props.extend(props.clone());
120129

121-
GlueCatalog::new(config).await.unwrap()
130+
GlueCatalogBuilder::default()
131+
.load("glue", glue_props)
132+
.await
133+
.unwrap()
122134
}
123135

124136
async fn set_test_namespace(catalog: &GlueCatalog, namespace: &NamespaceIdent) -> Result<()> {

crates/catalog/hms/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ pilota = { workspace = true }
3838
serde_json = { workspace = true }
3939
tokio = { workspace = true }
4040
tracing = { workspace = true }
41-
typed-builder = { workspace = true }
4241
volo-thrift = { workspace = true }
4342

4443
# Transitive dependencies below

crates/catalog/hms/LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../LICENSE

0 commit comments

Comments
 (0)