|
1 | 1 | /*
|
2 |
| - * Copyright 2022-2025 Crown Copyright |
3 |
| - * |
4 |
| - * Licensed under the Apache License, Version 2.0 (the "License"); |
5 |
| - * you may not use this file except in compliance with the License. |
6 |
| - * You may obtain a copy of the License at |
7 |
| - * |
8 |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
9 |
| - * |
10 |
| - * Unless required by applicable law or agreed to in writing, software |
11 |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
12 |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 |
| - * See the License for the specific language governing permissions and |
14 |
| - * limitations under the License. |
15 |
| - */ |
| 2 | +* Copyright 2022-2025 Crown Copyright |
| 3 | +* |
| 4 | +* Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +* you may not use this file except in compliance with the License. |
| 6 | +* You may obtain a copy of the License at |
| 7 | +* |
| 8 | +* http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +* |
| 10 | +* Unless required by applicable law or agreed to in writing, software |
| 11 | +* distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +* See the License for the specific language governing permissions and |
| 14 | +* limitations under the License. |
| 15 | +*/ |
| 16 | +use apps::path_absolute; |
16 | 17 | use chrono::Local;
|
17 | 18 | use clap::Parser;
|
18 | 19 | use color_eyre::eyre::bail;
|
19 | 20 | use human_panic::setup_panic;
|
20 | 21 | use log::info;
|
21 | 22 | use num_format::{Locale, ToFormattedString};
|
22 | 23 | use sleeper_core::{
|
23 |
| - ColRange, CommonConfig, OperationOutput, PartitionBound, SleeperParquetOptions, |
| 24 | + ColRange, CommonConfigBuilder, OutputType, PartitionBound, SleeperParquetOptions, |
24 | 25 | SleeperPartitionRegion, run_compaction,
|
25 | 26 | };
|
26 |
| -use std::{collections::HashMap, io::Write, path::Path}; |
| 27 | +use std::{collections::HashMap, io::Write}; |
27 | 28 | use url::Url;
|
28 | 29 |
|
29 |
| -/// Implements a Sleeper compaction algorithm in Rust. |
| 30 | +/// Runs a Sleeper compaction algorithm. |
30 | 31 | ///
|
31 | 32 | /// A sequence of Parquet files is read and compacted into a single output Parquet file. The input
|
32 | 33 | /// files must be individually sorted according to the row key columns and then the sort columns. A sketches file containing
|
@@ -63,16 +64,6 @@ struct CmdLineArgs {
|
63 | 64 | iterator_config: Option<String>,
|
64 | 65 | }
|
65 | 66 |
|
66 |
| -/// Converts a [`Path`] reference to an absolute path (if not already absolute) |
67 |
| -/// and returns it as a String. |
68 |
| -/// |
69 |
| -/// # Panics |
70 |
| -/// If the path can't be made absolute due to not being able to get the current |
71 |
| -/// directory or the path is not valid. |
72 |
| -fn path_absolute<T: ?Sized + AsRef<Path>>(path: &T) -> String { |
73 |
| - std::path::absolute(path).unwrap().to_str().unwrap().into() |
74 |
| -} |
75 |
| - |
76 | 67 | #[tokio::main(flavor = "multi_thread")]
|
77 | 68 | async fn main() -> color_eyre::Result<()> {
|
78 | 69 | // Install coloured errors
|
@@ -149,79 +140,26 @@ async fn main() -> color_eyre::Result<()> {
|
149 | 140 | dict_enc_values: true,
|
150 | 141 | };
|
151 | 142 |
|
152 |
| - let details = CommonConfig { |
153 |
| - aws_config: None, |
154 |
| - input_files: input_urls, |
155 |
| - input_files_sorted: true, |
156 |
| - row_key_cols: args.row_keys, |
157 |
| - sort_key_cols: args.sort_keys, |
158 |
| - region: SleeperPartitionRegion::new(map), |
159 |
| - output: OperationOutput::File { |
| 143 | + let details = CommonConfigBuilder::new() |
| 144 | + .aws_config(None) |
| 145 | + .input_files(input_urls) |
| 146 | + .input_files_sorted(true) |
| 147 | + .row_key_cols(args.row_keys) |
| 148 | + .sort_key_cols(args.sort_keys) |
| 149 | + .region(SleeperPartitionRegion::new(map)) |
| 150 | + .output(OutputType::File { |
160 | 151 | output_file,
|
161 | 152 | opts: parquet_options,
|
162 |
| - }, |
163 |
| - iterator_config: args.iterator_config, |
164 |
| - }; |
165 |
| - |
166 |
| - let result = run_compaction(&details).await; |
167 |
| - match result { |
168 |
| - Ok(r) => { |
169 |
| - info!( |
170 |
| - "Compaction read {} rows and wrote {} rows", |
171 |
| - r.rows_read.to_formatted_string(&Locale::en), |
172 |
| - r.rows_written.to_formatted_string(&Locale::en) |
173 |
| - ); |
174 |
| - } |
175 |
| - Err(e) => { |
176 |
| - bail!(e); |
177 |
| - } |
178 |
| - } |
179 |
| - Ok(()) |
180 |
| -} |
181 |
| - |
182 |
| -#[cfg(test)] |
183 |
| -mod path_test { |
184 |
| - use crate::path_absolute; |
185 |
| - |
186 |
| - #[cfg(not(any(target_os = "windows", target_os = "macos")))] |
187 |
| - fn cd_to_tmp() { |
188 |
| - std::env::set_current_dir("/tmp").unwrap(); |
189 |
| - } |
190 |
| - |
191 |
| - #[test] |
192 |
| - #[cfg(not(any(target_os = "windows", target_os = "macos")))] |
193 |
| - fn relative_path_converts() { |
194 |
| - cd_to_tmp(); |
195 |
| - assert_eq!("/tmp/foo/bar/baz.txt", path_absolute("foo/bar/baz.txt")); |
196 |
| - } |
197 |
| - |
198 |
| - #[test] |
199 |
| - #[cfg(not(any(target_os = "windows", target_os = "macos")))] |
200 |
| - fn relative_path_converts_with_one_dot() { |
201 |
| - cd_to_tmp(); |
202 |
| - assert_eq!("/tmp/foo/bar/baz.txt", path_absolute("./foo/bar/baz.txt")); |
203 |
| - } |
204 |
| - |
205 |
| - #[test] |
206 |
| - #[cfg(not(any(target_os = "windows", target_os = "macos")))] |
207 |
| - fn relative_path_converts_with_double_dot() { |
208 |
| - cd_to_tmp(); |
209 |
| - assert_eq!( |
210 |
| - "/tmp/../foo/bar/baz.txt", |
211 |
| - path_absolute("../foo/bar/baz.txt") |
212 |
| - ); |
213 |
| - } |
| 153 | + }) |
| 154 | + .iterator_config(args.iterator_config) |
| 155 | + .build()?; |
214 | 156 |
|
215 |
| - #[test] |
216 |
| - #[cfg(not(any(target_os = "windows", target_os = "macos")))] |
217 |
| - fn absolute_path_unchanged() { |
218 |
| - cd_to_tmp(); |
219 |
| - assert_eq!("/tmp/foo/bar", path_absolute("/tmp/foo/bar")); |
220 |
| - } |
| 157 | + let result = run_compaction(&details).await?; |
| 158 | + info!( |
| 159 | + "Compaction read {} rows and wrote {} rows", |
| 160 | + result.rows_read.to_formatted_string(&Locale::en), |
| 161 | + result.rows_written.to_formatted_string(&Locale::en) |
| 162 | + ); |
221 | 163 |
|
222 |
| - #[test] |
223 |
| - #[should_panic(expected = "cannot make an empty path absolute")] |
224 |
| - fn empty_path_panic() { |
225 |
| - let _ = path_absolute(""); |
226 |
| - } |
| 164 | + Ok(()) |
227 | 165 | }
|
0 commit comments