Skip to content

Commit 8996ff6

Browse files
committed
feat: summary loading
1 parent a583239 commit 8996ff6

File tree

15 files changed

+558
-277
lines changed

15 files changed

+558
-277
lines changed

tanic-svc/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,4 @@ names = "0.14.0"
3333
tokio-stream = { version = "0.1.17", features = ["sync"] }
3434
parquet = "54.0.0"
3535
indexmap = "2.7.1"
36+
futures = "0.3.31"

tanic-svc/src/iceberg_context.rs

Lines changed: 160 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,29 @@
11
//! Iceberg Context
22
3-
use iceberg::{Catalog, NamespaceIdent};
4-
use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig};
53
use std::sync::Arc;
4+
use std::sync::RwLock;
5+
6+
use futures::stream::StreamExt;
7+
use iceberg::{Catalog, NamespaceIdent, TableIdent};
8+
use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig};
69
use tokio::sync::mpsc::UnboundedSender;
710
use tokio::sync::watch::Receiver;
8-
use std::sync::RwLock;
911
use tokio::task::JoinHandle;
10-
use tokio_stream::{wrappers::WatchStream, StreamExt};
12+
use tokio_stream::wrappers::WatchStream;
1113

1214
use tanic_core::config::ConnectionDetails;
1315
use tanic_core::message::{NamespaceDeets, TableDeets};
1416
use tanic_core::{Result, TanicError};
17+
use tokio::sync::mpsc::{channel, Receiver as MpscReceiver, Sender as MpscSender};
18+
use tokio_stream::wrappers::ReceiverStream;
1519

1620
use crate::state::{TanicAction, TanicAppState, TanicIcebergState};
1721

1822
type ActionTx = UnboundedSender<TanicAction>;
1923
type IceCtxRef = Arc<RwLock<IcebergContext>>;
2024

25+
const JOB_STREAM_CONCURRENCY: usize = 1;
26+
2127
#[derive(Debug, Default)]
2228
struct IcebergContext {
2329
connection_details: Option<ConnectionDetails>,
@@ -40,6 +46,13 @@ pub struct IcebergContextManager {
4046
state_ref: Arc<RwLock<TanicAppState>>,
4147
}
4248

49+
#[derive(Debug)]
50+
enum IcebergTask {
51+
Namespaces,
52+
TablesForNamespace(NamespaceDeets),
53+
SummaryForTable(TableDeets),
54+
}
55+
4356
impl IcebergContextManager {
4457
pub fn new(action_tx: ActionTx, state_ref: Arc<RwLock<TanicAppState>>) -> Self {
4558
Self {
@@ -52,8 +65,16 @@ impl IcebergContextManager {
5265
pub async fn event_loop(&self, state_rx: Receiver<()>) -> Result<()> {
5366
let mut state_stream = WatchStream::new(state_rx);
5467

55-
while state_stream.next().await.is_some() {
68+
let (job_queue_tx, job_queue_rx) = channel(10);
69+
70+
tokio::spawn({
71+
let action_tx = self.action_tx.clone();
72+
let job_queue_tx = job_queue_tx.clone();
73+
let iceberg_ctx = self.iceberg_context.clone();
74+
async move { Self::job_handler(job_queue_rx, job_queue_tx, action_tx, iceberg_ctx).await }
75+
});
5676

77+
while state_stream.next().await.is_some() {
5778
let new_conn_details = {
5879
let state = self.state_ref.read().unwrap();
5980

@@ -64,36 +85,27 @@ impl IcebergContextManager {
6485
TanicIcebergState::Exiting => {
6586
break;
6687
}
67-
_ => None
88+
_ => None,
6889
}
6990
};
7091

7192
if let Some(new_conn_details) = new_conn_details {
72-
self.connect_to(&new_conn_details).await?;
73-
74-
let namespaces = {
75-
self.iceberg_context.read().unwrap().namespaces.clone()
76-
};
93+
self.connect_to(&new_conn_details, job_queue_tx.clone())
94+
.await?;
7795

7896
// begin crawl
79-
for namespace in namespaces {
80-
81-
// spawn a task to start populating the namespaces
82-
let action_tx = self.action_tx.clone();
83-
let ctx = self.iceberg_context.clone();
84-
85-
// TODO: handle handle, lol
86-
let _jh = tokio::spawn(async move {
87-
Self::populate_tables(ctx, action_tx, namespace).await
88-
});
89-
}
97+
let _ = job_queue_tx.send(IcebergTask::Namespaces).await;
9098
}
9199
}
92100

93101
Ok(())
94102
}
95103

96-
async fn connect_to(&self, new_conn_details: &ConnectionDetails) -> Result<()> {
104+
async fn connect_to(
105+
&self,
106+
new_conn_details: &ConnectionDetails,
107+
_job_queue_tx: MpscSender<IcebergTask>,
108+
) -> Result<()> {
97109
{
98110
let ctx = self.iceberg_context.read().unwrap();
99111
if let Some(ref existing_conn_details) = ctx.connection_details {
@@ -104,31 +116,19 @@ impl IcebergContextManager {
104116
}
105117
}
106118

107-
// cancel any in-progress action and connect to the new connection
108119
{
109120
let mut ctx = self.iceberg_context.write().unwrap();
110-
// TODO: cancel in-prog action
111-
// if let Some(cancellable) = *ctx.deref_mut().cancellable_action {
112-
// cancellable.abort();
113-
// }
114121
ctx.connect_to(new_conn_details);
115122
}
116123

117-
// spawn a task to start populating the namespaces
118-
let action_tx = self.action_tx.clone();
119-
let ctx = self.iceberg_context.clone();
120-
// TODO: store the join handle for cancellation
121-
let _jh = tokio::spawn(async move {
122-
let res = Self::populate_namespaces(ctx, action_tx).await;
123-
if let Err(error) = res {
124-
tracing::error!(%error, "Error populating namespaces");
125-
}
126-
});
127-
128124
Ok(())
129125
}
130126

131-
async fn populate_namespaces(ctx: IceCtxRef, action_tx: ActionTx) -> Result<()> {
127+
async fn populate_namespaces(
128+
ctx: IceCtxRef,
129+
action_tx: ActionTx,
130+
job_queue_tx: MpscSender<IcebergTask>,
131+
) -> Result<()> {
132132
let root_namespaces = {
133133
let catalog = {
134134
let r_ctx = ctx.read().unwrap();
@@ -157,23 +157,30 @@ impl IcebergContextManager {
157157

158158
action_tx
159159
.send(TanicAction::UpdateNamespacesList(
160-
namespaces.iter().map(|ns| {
161-
ns.name.clone()
162-
}).collect::<Vec<_>>()
160+
namespaces
161+
.iter()
162+
.map(|ns| ns.name.clone())
163+
.collect::<Vec<_>>(),
163164
))
164165
.map_err(|err| TanicError::UnexpectedError(err.to_string()))?;
165166

167+
for namespace in namespaces {
168+
let _ = job_queue_tx
169+
.send(IcebergTask::TablesForNamespace(namespace.clone()))
170+
.await;
171+
}
172+
166173
Ok(())
167174
}
168175

169176
async fn populate_tables(
170177
ctx: IceCtxRef,
171178
action_tx: ActionTx,
172179
namespace: NamespaceDeets,
180+
job_queue_tx: MpscSender<IcebergTask>,
173181
) -> Result<()> {
174182
let namespace_ident = NamespaceIdent::from_strs(namespace.parts.clone())?;
175183
let tables = {
176-
177184
let catalog = {
178185
let r_ctx = ctx.read().unwrap();
179186

@@ -206,10 +213,59 @@ impl IcebergContextManager {
206213
action_tx
207214
.send(TanicAction::UpdateNamespaceTableList(
208215
namespace.name.clone(),
209-
tables.iter().map(|t|&t.name).cloned().collect(),
216+
tables.iter().map(|t| &t.name).cloned().collect(),
210217
))
211218
.map_err(TanicError::unexpected)?;
212219

220+
for table in tables {
221+
let _ = job_queue_tx
222+
.send(IcebergTask::SummaryForTable(table.clone()))
223+
.await;
224+
}
225+
226+
Ok(())
227+
}
228+
229+
async fn populate_table_summary(
230+
ctx: IceCtxRef,
231+
action_tx: ActionTx,
232+
table: TableDeets,
233+
_job_queue_tx: MpscSender<IcebergTask>,
234+
) -> Result<()> {
235+
let namespace_ident = NamespaceIdent::from_strs(table.namespace.clone())?;
236+
let table_ident = TableIdent::new(namespace_ident.clone(), table.name.clone());
237+
238+
let loaded_table = {
239+
let catalog = {
240+
let r_ctx = ctx.read().unwrap();
241+
242+
let Some(ref catalog) = r_ctx.catalog else {
243+
return Err(TanicError::unexpected(
244+
"Attempted to populate table summary when catalog not initialised",
245+
));
246+
};
247+
248+
catalog.clone()
249+
};
250+
251+
catalog.load_table(&table_ident).await?
252+
};
253+
254+
let summary = loaded_table
255+
.metadata()
256+
.current_snapshot()
257+
.unwrap()
258+
.summary();
259+
tracing::info!(?summary);
260+
261+
action_tx
262+
.send(TanicAction::UpdateTableSummary {
263+
namespace: namespace_ident.to_url_string(),
264+
table_name: table_ident.name.clone(),
265+
table_summary: summary.additional_properties.clone(),
266+
})
267+
.map_err(TanicError::unexpected)?;
268+
213269
Ok(())
214270
}
215271
}
@@ -229,3 +285,62 @@ impl IcebergContext {
229285
self.tables = vec![];
230286
}
231287
}
288+
289+
impl IcebergContextManager {
290+
async fn job_handler(
291+
job_queue_rx: MpscReceiver<IcebergTask>,
292+
job_queue_tx: MpscSender<IcebergTask>,
293+
action_tx: ActionTx,
294+
iceberg_ctx: IceCtxRef,
295+
) {
296+
let job_stream = ReceiverStream::new(job_queue_rx);
297+
298+
// let _ = tokio::spawn(async move {
299+
job_stream
300+
.map(|task| {
301+
(
302+
task,
303+
iceberg_ctx.clone(),
304+
action_tx.clone(),
305+
job_queue_tx.clone(),
306+
)
307+
})
308+
.for_each_concurrent(
309+
JOB_STREAM_CONCURRENCY,
310+
async move |(task, iceberg_ctx, action_tx, job_queue_tx)| {
311+
match task {
312+
IcebergTask::Namespaces => {
313+
let _ = IcebergContextManager::populate_namespaces(
314+
iceberg_ctx,
315+
action_tx,
316+
job_queue_tx,
317+
)
318+
.await;
319+
}
320+
321+
IcebergTask::TablesForNamespace(namespace) => {
322+
let _ = IcebergContextManager::populate_tables(
323+
iceberg_ctx,
324+
action_tx,
325+
namespace,
326+
job_queue_tx,
327+
)
328+
.await;
329+
}
330+
331+
IcebergTask::SummaryForTable(table) => {
332+
let _ = IcebergContextManager::populate_table_summary(
333+
iceberg_ctx,
334+
action_tx,
335+
table,
336+
job_queue_tx,
337+
)
338+
.await;
339+
} // _ => {}
340+
}
341+
},
342+
)
343+
.await;
344+
// }).await;
345+
}
346+
}

tanic-svc/src/lib.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ use tokio::sync::watch::{Receiver as WatchReceiver, Sender as WatchSender};
88
pub mod iceberg_context;
99
pub mod state;
1010

11-
pub use state::{TanicAction, TanicAppState};
1211
use crate::state::TanicIcebergState;
12+
pub use state::{TanicAction, TanicAppState};
1313

1414
pub struct AppStateManager {
1515
action_rx: MpscReceiver<TanicAction>,
@@ -22,9 +22,7 @@ pub struct AppStateManager {
2222
}
2323

2424
impl AppStateManager {
25-
pub fn new(
26-
_config: TanicConfig,
27-
) -> (Self, MpscSender<TanicAction>, WatchReceiver<()>) {
25+
pub fn new(_config: TanicConfig) -> (Self, MpscSender<TanicAction>, WatchReceiver<()>) {
2826
let state = Arc::new(RwLock::new(TanicAppState::default()));
2927

3028
let (action_tx, action_rx) = tokio::sync::mpsc::unbounded_channel();

0 commit comments

Comments
 (0)