diff --git a/Cargo.lock b/Cargo.lock index 7a9bb807..1b8f7232 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -172,11 +172,22 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bincode" -version = "1.3.3" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" dependencies = [ + "bincode_derive", "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", ] [[package]] @@ -892,6 +903,18 @@ dependencies = [ "wasi", ] +[[package]] +name = "gpqa" +version = "0.32.0" +dependencies = [ + "anyhow", + "bincode", + "clap", + "egobox-ego", + "egobox-moe", + "rayon", +] + [[package]] name = "half" version = "2.6.0" @@ -2349,6 +2372,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2361,6 +2390,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "virtue" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 621d09f5..af846fa0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,9 @@ serde_json = "1" ctrlc = "3.4" +clap = { version = "4", features = ["derive"] } +bincode = { version = "2", features = ["serde"] } + # dev dependencies criterion = "0.5" approx = "0.4" diff --git a/crates/ego/Cargo.toml b/crates/ego/Cargo.toml index 2dfe538a..a3ced730 100644 --- a/crates/ego/Cargo.toml +++ b/crates/ego/Cargo.toml @@ -11,7 +11,7 @@ keywords = ["machine-learning", "bayesian", "optimization"] categories = ["algorithms", "mathematics", "science"] [features] -default = [] +default = ["persistent"] persistent = ["egobox-moe/persistent"] blas = ["ndarray-linalg", "linfa/ndarray-linalg", "linfa-pls/blas"] @@ -43,7 +43,7 @@ nlopt = { version = "0.8", optional = true } rand_xoshiro = { version = "0.6", features = ["serde1"] } argmin = { version = "0.10.0", features = ["serde1", "ctrlc"] } -bincode = { version = "1.3.0" } +bincode.workspace = true web-time = "1.1.0" libm = "0.2.6" finitediff.workspace = true @@ -53,7 +53,7 @@ log.workspace = true env_logger.workspace = true thiserror.workspace = true anyhow.workspace = true -clap = { version = "4", features = ["derive"] } +clap.workspace = true serde = { version = "1", features = ["derive", "rc"] } serde_json.workspace = true diff --git a/crates/ego/examples/g24.rs b/crates/ego/examples/g24.rs index a93b9af5..51c4f328 100644 --- a/crates/ego/examples/g24.rs +++ b/crates/ego/examples/g24.rs @@ -1,6 +1,6 @@ use egobox_doe::{Lhs, SamplingMethod}; -use egobox_ego::{Cstr, EgorServiceFactory}; -use ndarray::{Array2, ArrayBase, ArrayView2, Axis, Data, Ix1, Zip, array, concatenate}; +use egobox_ego::{EgorBuilder, InfillOptimizer}; +use ndarray::{Array2, ArrayBase, ArrayView2, Data, Ix1, Zip, array}; // Objective fn g24(x: &ArrayBase, Ix1>) -> f64 { @@ -30,21 +30,19 @@ fn f_g24(x: &ArrayView2) -> Array2 { fn main() { let xlimits = array![[0., 3.], [0., 4.]]; - let mut doe = Lhs::new(&xlimits).sample(3); - - // We use Egor optimizer as a service - let egor = EgorServiceFactory::::optimize() - .configure(|config| config.n_cstr(2).seed(42)) - .min_within(&xlimits); - - let mut y_doe = f_g24(&doe.view()); - for _i in 0..10 { - // We tell function values and ask for next x location - let x_suggested = egor.suggest(&doe, &y_doe); - - doe = concatenate![Axis(0), doe, x_suggested]; - y_doe = f_g24(&doe.view()); - } - - println!("G24 optim x suggestion history = {doe:?}"); + let doe = Lhs::new(&xlimits).sample(3); + + let res = EgorBuilder::optimize(f_g24) + .configure(|config| { + config + .n_cstr(2) + .doe(&doe) + .max_iters(100) + .infill_optimizer(InfillOptimizer::Cobyla) + .seed(42) + }) + .min_within(&xlimits) + .run() + .expect("Minimize failure"); + println!("G24 optim result = {}", res.y_opt); } diff --git a/crates/ego/examples/g24_suggest.rs b/crates/ego/examples/g24_suggest.rs new file mode 100644 index 00000000..a93b9af5 --- /dev/null +++ b/crates/ego/examples/g24_suggest.rs @@ -0,0 +1,50 @@ +use egobox_doe::{Lhs, SamplingMethod}; +use egobox_ego::{Cstr, EgorServiceFactory}; +use ndarray::{Array2, ArrayBase, ArrayView2, Axis, Data, Ix1, Zip, array, concatenate}; + +// Objective +fn g24(x: &ArrayBase, Ix1>) -> f64 { + // Function G24: 1 global optimum y_opt = -5.5080 at x_opt =(2.3295, 3.1785) + -x[0] - x[1] +} + +// Constraints < 0 +fn g24_c1(x: &ArrayBase, Ix1>) -> f64 { + -2.0 * x[0].powf(4.0) + 8.0 * x[0].powf(3.0) - 8.0 * x[0].powf(2.0) + x[1] - 2.0 +} + +fn g24_c2(x: &ArrayBase, Ix1>) -> f64 { + -4.0 * x[0].powf(4.0) + 32.0 * x[0].powf(3.0) - 88.0 * x[0].powf(2.0) + 96.0 * x[0] + x[1] + - 36.0 +} + +fn f_g24(x: &ArrayView2) -> Array2 { + let mut y = Array2::zeros((x.nrows(), 3)); + Zip::from(y.rows_mut()) + .and(x.rows()) + .for_each(|mut yi, xi| { + yi.assign(&array![g24(&xi), g24_c1(&xi), g24_c2(&xi)]); + }); + y +} + +fn main() { + let xlimits = array![[0., 3.], [0., 4.]]; + let mut doe = Lhs::new(&xlimits).sample(3); + + // We use Egor optimizer as a service + let egor = EgorServiceFactory::::optimize() + .configure(|config| config.n_cstr(2).seed(42)) + .min_within(&xlimits); + + let mut y_doe = f_g24(&doe.view()); + for _i in 0..10 { + // We tell function values and ask for next x location + let x_suggested = egor.suggest(&doe, &y_doe); + + doe = concatenate![Axis(0), doe, x_suggested]; + y_doe = f_g24(&doe.view()); + } + + println!("G24 optim x suggestion history = {doe:?}"); +} diff --git a/crates/ego/examples/mopta08.rs b/crates/ego/examples/mopta08.rs index 0d9e6495..d7ae14f8 100644 --- a/crates/ego/examples/mopta08.rs +++ b/crates/ego/examples/mopta08.rs @@ -265,7 +265,7 @@ fn main() -> anyhow::Result<()> { let mut xlimits = Array2::zeros((dim, 2)); xlimits.column_mut(1).assign(&Array1::ones(dim)); - let res = if std::env::var(egobox_ego::EGOBOX_USE_GP_VAR_PORTFOLIO).is_ok() { + let res = if std::env::var(egobox_ego::EGOR_USE_GP_VAR_PORTFOLIO).is_ok() { EgorBuilder::optimize(mopta_func(dim)) .configure(|config| { config diff --git a/crates/ego/src/criteria/mod.rs b/crates/ego/src/criteria/mod.rs index 48671f06..c0078765 100644 --- a/crates/ego/src/criteria/mod.rs +++ b/crates/ego/src/criteria/mod.rs @@ -13,7 +13,7 @@ use ndarray::{Array1, ArrayView2}; /// determine the next most promising point expected to be the /// optimum location of the objective function #[clonable] -#[typetag::serde(tag = "type")] +#[typetag::serde(tag = "type_infill")] pub trait InfillCriterion: Clone + Sync { /// Name of the infill criterion fn name(&self) -> &'static str; diff --git a/crates/ego/src/egor.rs b/crates/ego/src/egor.rs index 2585f67e..fb64519a 100644 --- a/crates/ego/src/egor.rs +++ b/crates/ego/src/egor.rs @@ -907,6 +907,7 @@ mod tests { let doe = Lhs::new(&xlimits) .with_rng(Xoshiro256Plus::seed_from_u64(42)) .sample(10); + let q = 2; let res = EgorBuilder::optimize(f_g24) .configure(|config| { config @@ -916,7 +917,7 @@ mod tests { }) .n_cstr(2) .cstr_tol(array![2e-6, 2e-6]) - .q_points(2) + .q_points(q) .qei_strategy(QEiStrategy::KrigingBeliever) .doe(&doe) .target(-5.5030) @@ -926,6 +927,7 @@ mod tests { .min_within(&xlimits) .run() .expect("Egor minimization"); + assert_eq!(res.x_doe.nrows(), doe.nrows() + q * res.state.iter as usize); println!("G24 optim result = {res:?}"); let expected = array![2.3295, 3.1785]; assert_abs_diff_eq!(expected, res.x_opt, epsilon = 2e-2); diff --git a/crates/ego/src/errors.rs b/crates/ego/src/errors.rs index f9b91d9d..5fff7935 100644 --- a/crates/ego/src/errors.rs +++ b/crates/ego/src/errors.rs @@ -38,4 +38,16 @@ pub enum EgoError { /// When global EGO step cannot add any point #[error("EGO exit (no more point)")] NoMorePointToAddError(Box>), + /// When error during saving + #[cfg(feature = "persistent")] + #[error("Save error: {0}")] + SaveBinaryError(#[from] bincode::error::EncodeError), + /// When error during loading + #[cfg(feature = "persistent")] + #[error("Load error: {0}")] + LoadBinaryError(#[from] bincode::error::DecodeError), + /// When error during saving + #[cfg(feature = "persistent")] + #[error("Save error: {0}")] + JsonError(#[from] serde_json::Error), } diff --git a/crates/ego/src/gpmix/mixint.rs b/crates/ego/src/gpmix/mixint.rs index 1ad46701..def36d14 100644 --- a/crates/ego/src/gpmix/mixint.rs +++ b/crates/ego/src/gpmix/mixint.rs @@ -7,10 +7,10 @@ use crate::errors::{EgoError, Result}; use crate::types::{SurrogateBuilder, XType}; use egobox_doe::{FullFactorial, Lhs, LhsKind, Random}; use egobox_gp::ThetaTuning; -use egobox_gp::metrics::CrossValScore; use egobox_moe::{ Clustered, Clustering, CorrelationSpec, FullGpSurrogate, GpMixture, GpMixtureParams, - GpSurrogate, GpSurrogateExt, MixtureGpSurrogate, NbClusters, Recombination, RegressionSpec, + GpQualityAssurance, GpScore, GpSurrogate, GpSurrogateExt, MixtureGpSurrogate, NbClusters, + Recombination, RegressionSpec, }; use linfa::traits::{Fit, PredictInplace}; use linfa::{DatasetBase, Float, ParamGuard}; @@ -612,13 +612,32 @@ impl GpSurrogate for MixintGpMixture { let mut file = fs::File::create(path).unwrap(); let bytes = match format { GpFileFormat::Json => serde_json::to_vec(self).map_err(MoeError::SaveJsonError)?, - GpFileFormat::Binary => bincode::serialize(self).map_err(MoeError::SaveBinaryError)?, + GpFileFormat::Binary => { + bincode::serde::encode_to_vec(self, bincode::config::standard()) + .map_err(MoeError::SaveBinaryError)? + } }; file.write_all(&bytes)?; Ok(()) } } +impl MixintGpMixture { + /// Load MixintGpMixture from given file. + #[cfg(feature = "persistent")] + pub fn load(path: &str, format: GpFileFormat) -> Result> { + let data = fs::read(path)?; + let moe = match format { + GpFileFormat::Json => serde_json::from_slice(&data).unwrap(), + GpFileFormat::Binary => { + bincode::serde::decode_from_slice(&data, bincode::config::standard()) + .map(|(surrogate, _)| surrogate)? + } + }; + Ok(Box::new(moe)) + } +} + #[typetag::serde] impl GpSurrogateExt for MixintGpMixture { fn predict_gradients(&self, x: &ArrayView2) -> egobox_moe::Result> { @@ -652,16 +671,40 @@ impl GpSurrogateExt for MixintGpMixture { } } -impl CrossValScore for MixintGpMixture { +impl GpScore for MixintGpMixture { + fn params(&self) -> MixintGpMixtureParams { + self.params.clone().into() + } + fn training_data(&self) -> &(Array2, Array1) { &self.training_data } +} - fn params(&self) -> MixintGpMixtureParams { - MixintGpMixtureParams::from(self.params.clone()) +#[typetag::serde] +impl GpQualityAssurance for MixintGpMixture { + fn training_data(&self) -> &(Array2, Array1) { + (self as &dyn GpScore<_, _, _>).training_data() + } + + fn q2(&self, kfold: usize) -> f64 { + (self as &dyn GpScore<_, _, _>).q2_score(kfold) + } + + fn looq2(&self) -> f64 { + (self as &dyn GpScore<_, _, _>).looq2_score() + } + + fn pva(&self, kfold: usize) -> f64 { + (self as &dyn GpScore<_, _, _>).pva_score(kfold) + } + + fn loopva(&self) -> f64 { + (self as &dyn GpScore<_, _, _>).loopva_score() } } +#[typetag::serde] impl MixtureGpSurrogate for MixintGpMixture { fn experts(&self) -> &Vec> { self.moe.experts() diff --git a/crates/ego/src/lib.rs b/crates/ego/src/lib.rs index d798fb86..7e0ebb5f 100644 --- a/crates/ego/src/lib.rs +++ b/crates/ego/src/lib.rs @@ -300,8 +300,9 @@ pub use crate::gpmix::spec::{CorrelationSpec, RegressionSpec}; pub use crate::solver::*; pub use crate::types::*; pub use crate::utils::{ - CHECKPOINT_FILE, Checkpoint, CheckpointingFrequency, EGOBOX_LOG, EGOBOX_USE_GP_VAR_PORTFOLIO, - EGOBOX_USE_MAX_PROBA_OF_FEASIBILITY, HotStartCheckpoint, HotStartMode, find_best_result_index, + CHECKPOINT_FILE, Checkpoint, CheckpointingFrequency, EGOBOX_LOG, EGOR_GP_FILENAME, + EGOR_INITIAL_GP_FILENAME, EGOR_USE_GP_RECORDER, EGOR_USE_GP_VAR_PORTFOLIO, + EGOR_USE_MAX_PROBA_OF_FEASIBILITY, HotStartCheckpoint, HotStartMode, find_best_result_index, }; mod optimizers; diff --git a/crates/ego/src/solver/egor_solver.rs b/crates/ego/src/solver/egor_solver.rs index 84878790..39de4e07 100644 --- a/crates/ego/src/solver/egor_solver.rs +++ b/crates/ego/src/solver/egor_solver.rs @@ -105,7 +105,7 @@ //! ``` //! use crate::utils::{ - EGOBOX_LOG, EGOBOX_USE_GP_VAR_PORTFOLIO, EGOBOX_USE_MAX_PROBA_OF_FEASIBILITY, + EGOBOX_LOG, EGOR_USE_GP_VAR_PORTFOLIO, EGOR_USE_MAX_PROBA_OF_FEASIBILITY, find_best_result_index, is_feasible, }; use crate::{EgoError, EgorConfig, EgorState, MAX_POINT_ADDITION_RETRY}; @@ -273,15 +273,14 @@ where // Use proba of feasibility require related env var to be defined // (err to get var means not defined, means feasability is set to true whatever, // means given infill criterion is used whatever) - initial_state.feasibility = std::env::var(EGOBOX_USE_MAX_PROBA_OF_FEASIBILITY).is_err() - || { - is_feasible( - &y_data.row(best_index), - &c_data.row(best_index), - &initial_state.cstr_tol, - ) - }; - if std::env::var(EGOBOX_USE_MAX_PROBA_OF_FEASIBILITY).is_ok() { + initial_state.feasibility = std::env::var(EGOR_USE_MAX_PROBA_OF_FEASIBILITY).is_err() || { + is_feasible( + &y_data.row(best_index), + &c_data.row(best_index), + &initial_state.cstr_tol, + ) + }; + if std::env::var(EGOR_USE_MAX_PROBA_OF_FEASIBILITY).is_ok() { info!("Using max proba of feasibility for infill criterion"); info!( "Initial best point feasibility = {}", @@ -294,13 +293,13 @@ where info!("{} set: {}", EGOBOX_LOG, std::env::var(EGOBOX_LOG).is_ok()); info!( "{} set: {}", - EGOBOX_USE_MAX_PROBA_OF_FEASIBILITY, - std::env::var(EGOBOX_USE_MAX_PROBA_OF_FEASIBILITY).is_ok() + EGOR_USE_MAX_PROBA_OF_FEASIBILITY, + std::env::var(EGOR_USE_MAX_PROBA_OF_FEASIBILITY).is_ok() ); info!( "{} set: {}", - EGOBOX_USE_GP_VAR_PORTFOLIO, - std::env::var(EGOBOX_USE_GP_VAR_PORTFOLIO).is_ok() + EGOR_USE_GP_VAR_PORTFOLIO, + std::env::var(EGOR_USE_GP_VAR_PORTFOLIO).is_ok() ); Ok((initial_state, None)) } @@ -445,7 +444,9 @@ where } let is_global_phase = (last_iter_success && state.prev_step_ego) - || ((state.get_iter() % (1 + self.config.trego.n_local_steps)) == 0); + || state + .get_iter() + .is_multiple_of(1 + self.config.trego.n_local_steps); if is_global_phase { // Global step diff --git a/crates/ego/src/solver/solver_impl.rs b/crates/ego/src/solver/solver_impl.rs index 96822733..081cb3c7 100644 --- a/crates/ego/src/solver/solver_impl.rs +++ b/crates/ego/src/solver/solver_impl.rs @@ -4,15 +4,9 @@ use crate::errors::{EgoError, Result}; use crate::gpmix::mixint::{as_continuous_limits, to_discrete_space}; use crate::solver::solver_computations::MiddlePickerMultiStarter; use crate::solver::solver_infill_optim::InfillOptProblem; -//use crate::utils::gp_recorder; use crate::utils::{ - //EGOBOX_GP_RECORDER, - EGOBOX_LOG, - EGOBOX_USE_GP_VAR_PORTFOLIO, - find_best_result_index_from, - is_feasible, - select_from_portfolio, - update_data, + EGOBOX_LOG, EGOR_USE_GP_VAR_PORTFOLIO, find_best_result_index_from, is_feasible, + select_from_portfolio, update_data, }; use crate::{DEFAULT_CSTR_TOL, EgorSolver, MAX_POINT_ADDITION_RETRY}; use crate::{EgorConfig, find_best_result_index}; @@ -114,7 +108,7 @@ where { pub fn have_to_recluster(&self, added: usize, prev_added: usize) -> bool { self.config.gp.n_clusters.is_auto() - && (added != 0 && added % 10 == 0 && added - prev_added > 0) + && (added != 0 && added.is_multiple_of(10) && added - prev_added > 0) } /// Build surrogate given training data and surrogate builder @@ -584,7 +578,7 @@ where let mut portfolio = vec![]; let sigma_weights = - if std::env::var(EGOBOX_USE_GP_VAR_PORTFOLIO).is_ok() && self.config.q_points == 1 { + if std::env::var(EGOR_USE_GP_VAR_PORTFOLIO).is_ok() && self.config.q_points == 1 { // Do not believe GP variance, weight it to generate possibly several clusters // hence several points to add // logspace(0.1, 100., 13) with 1. moved in front @@ -639,9 +633,9 @@ where format!("Constraint[{k}]") }; let make_clustering = (init && i == 0) || recluster; - let optimize_theta = - ((iter as usize * self.config.q_points + i) % (self.config.q_optmod) == 0) - && j == 0; + let optimize_theta = (iter as usize * self.config.q_points + i) + .is_multiple_of(self.config.q_optmod) + && j == 0; self.make_clustered_surrogate( &name, &xt, @@ -654,9 +648,23 @@ where ) }); let (models, inits): (Vec<_>, Vec<_>) = models_and_inits.unzip(); - // if std::env::var(EGOBOX_GP_RECORDER).is_ok() { - // gp_recorder::save_gp_models(&models); - // } + #[cfg(feature = "persistent")] + if std::env::var(crate::EGOR_USE_GP_RECORDER).is_ok() { + use crate::utils::{EGOR_GP_FILENAME, EGOR_INITIAL_GP_FILENAME, gp_recorder}; + + let default_dir = String::from("./"); + let outdir = self.config.outdir.as_ref().unwrap_or(&default_dir); + let filename = if iter == 0 { + EGOR_INITIAL_GP_FILENAME + } else { + EGOR_GP_FILENAME + }; + let filepath = std::path::Path::new(outdir).join(filename); + match gp_recorder::save_gp_models(&filepath, &models) { + Ok(_) => log::info!("GP models saved to {:?}", filepath), + Err(err) => log::info!("Cannot save GP models: {:?}", err), + }; + } (0..=self.config.n_cstr).for_each(|k| { clusterings[k] = Some(models[k].to_clustering()); diff --git a/crates/ego/src/utils/gp_recorder.rs b/crates/ego/src/utils/gp_recorder.rs new file mode 100644 index 00000000..48f53155 --- /dev/null +++ b/crates/ego/src/utils/gp_recorder.rs @@ -0,0 +1,20 @@ +use crate::errors::Result; +#[cfg(feature = "persistent")] +use std::fs; +#[cfg(feature = "persistent")] +use std::io::Write; +use std::path::Path; + +/// Save models in a bincode file +pub(crate) fn save_gp_models>( + path: P, + models: &[Box], +) -> Result<()> { + let mut file = fs::File::create(path).unwrap(); + + println!("Saving {} GP models...", models.len()); + let bytes = bincode::serde::encode_to_vec(models, bincode::config::standard())?; + file.write_all(&bytes)?; + + Ok(()) +} diff --git a/crates/ego/src/utils/hot_start.rs b/crates/ego/src/utils/hot_start.rs index 882552a4..62853603 100644 --- a/crates/ego/src/utils/hot_start.rs +++ b/crates/ego/src/utils/hot_start.rs @@ -1,8 +1,7 @@ use argmin::core::Error; pub use argmin::core::checkpointing::{Checkpoint, CheckpointingFrequency}; use serde::{Deserialize, Serialize, de::DeserializeOwned}; -use std::fs::File; -use std::io::{BufReader, BufWriter}; +use std::io::Write; use std::path::PathBuf; use crate::EgorState; @@ -93,8 +92,9 @@ where std::fs::create_dir_all(&self.directory)? } let fname = self.directory.join(&self.filename); - let f = BufWriter::new(File::create(fname)?); - bincode::serialize_into(f, &(solver, state))?; + let mut file = std::fs::File::create(fname).unwrap(); + let bytes = bincode::serde::encode_to_vec((solver, state), bincode::config::standard())?; + file.write_all(&bytes)?; Ok(()) } @@ -108,9 +108,11 @@ where if !path.exists() { return Ok(None); } - let file = File::open(path)?; - let reader = BufReader::new(file); - let (solver, mut state): (_, EgorState<_>) = bincode::deserialize_from(reader)?; + let data = std::fs::read(path)?; + let (solver, mut state): (_, EgorState<_>) = + bincode::serde::decode_from_slice(&data, bincode::config::standard()) + .map(|(res, _)| res) + .unwrap(); if let HotStartMode::ExtendedIters(n_iters) = self.mode { state.extend_max_iters(n_iters); } diff --git a/crates/ego/src/utils/mod.rs b/crates/ego/src/utils/mod.rs index 77af0f60..4ad2d911 100644 --- a/crates/ego/src/utils/mod.rs +++ b/crates/ego/src/utils/mod.rs @@ -1,6 +1,7 @@ mod bounds; mod cstr_pof; mod find_result; +pub(crate) mod gp_recorder; mod hot_start; mod logei_helper; mod misc; @@ -21,7 +22,16 @@ pub use start_points::*; pub const EGOBOX_LOG: &str = "EGOBOX_LOG"; /// Env variable to enable the use of PoF as criterion while no feasible point is found -pub const EGOBOX_USE_MAX_PROBA_OF_FEASIBILITY: &str = "EGOBOX_USE_MAX_PROBA_OF_FEASIBILITY"; +pub const EGOR_USE_MAX_PROBA_OF_FEASIBILITY: &str = "EGOR_USE_MAX_PROBA_OF_FEASIBILITY"; /// Env variable to enable the portfolio method used for global infill criterion optimization -pub const EGOBOX_USE_GP_VAR_PORTFOLIO: &str = "EGOBOX_USE_GP_VAR_PORTFOLIO"; +pub const EGOR_USE_GP_VAR_PORTFOLIO: &str = "EGOR_USE_GP_VAR_PORTFOLIO"; + +/// Env variable to trigger GP recording +pub const EGOR_USE_GP_RECORDER: &str = "EGOR_USE_GP_RECORDER"; + +/// Gaussian process filename to save initial GPs built from initial_doe +pub const EGOR_INITIAL_GP_FILENAME: &str = "egor_initial_gp.bin"; + +/// Gaussian process filename to save GPs built at the last iteration +pub const EGOR_GP_FILENAME: &str = "egor_gp.bin"; diff --git a/crates/gp/src/algorithm.rs b/crates/gp/src/algorithm.rs index d67c6410..28f50d27 100644 --- a/crates/gp/src/algorithm.rs +++ b/crates/gp/src/algorithm.rs @@ -941,7 +941,7 @@ fn reduced_likelihood( nugget: F, ) -> Result<(F, GpInnerParams)> { // Set up R - let mut r_mx: Array2 = Array2::::eye(x_distances.n_obs).mapv(|v| (v + v * nugget)); + let mut r_mx: Array2 = Array2::::eye(x_distances.n_obs).mapv(|v| v + v * nugget); for (i, ij) in x_distances.d_indices.outer_iter().enumerate() { r_mx[[ij[0], ij[1]]] = rxx[[i, 0]]; r_mx[[ij[1], ij[0]]] = rxx[[i, 0]]; @@ -1012,7 +1012,7 @@ fn reduced_likelihood( nugget: F, ) -> Result<(F, GpInnerParams)> { // Set up R - let mut r_mx: Array2 = Array2::::eye(x_distances.n_obs).mapv(|v| (v + v * nugget)); + let mut r_mx: Array2 = Array2::::eye(x_distances.n_obs).mapv(|v| v + v * nugget); for (i, ij) in x_distances.d_indices.outer_iter().enumerate() { r_mx[[ij[0], ij[1]]] = rxx[[i, 0]]; r_mx[[ij[1], ij[0]]] = rxx[[i, 0]]; diff --git a/crates/gp/src/metrics.rs b/crates/gp/src/metrics.rs index e11c9283..1508842b 100644 --- a/crates/gp/src/metrics.rs +++ b/crates/gp/src/metrics.rs @@ -3,48 +3,53 @@ use linfa::{ Float, ParamGuard, traits::{Fit, Predict, PredictInplace}, }; -use ndarray::{Array1, Array2, ArrayBase, Ix2, OwnedRepr}; +use ndarray::{Array1, Array2}; use crate::{ GaussianProcess, GpError, GpParams, SgpParams, SparseGaussianProcess, correlation_models, mean_models, }; -/// A trait for cross validation score -pub trait CrossValScore +/// A trait for Q2 predictive coefficient cross validation score +pub trait PredictScore where F: Float, ER: std::error::Error + From, P: Fit, Array1, ER, Object = O> + ParamGuard, - O: PredictInplace, Ix2>, Array1>, + O: PredictInplace, Array1>, { fn training_data(&self) -> &(Array2, Array1); fn params(&self) -> P; - /// Compute quality metric based on cross validation - fn cv_score(&self, fold: usize) -> F { + /// Compute quality metric Q2 with kfold cross validation + fn q2_score(&self, kfold: usize) -> F { let (xt, yt) = self.training_data(); let dataset = Dataset::new(xt.to_owned(), yt.to_owned()); - let mut error = F::zero(); - for (train, valid) in dataset.fold(fold).into_iter() { + let yt_mean = yt.mean().unwrap(); + // Predictive Residual Sum of Squares + let mut press = F::zero(); + // Total Sum of Squares + let mut tss = F::zero(); + for (train, valid) in dataset.fold(kfold).into_iter() { let params = self.params(); let model: O = params .fit(&train) .expect("cross-validation: sub model fitted"); let pred = model.predict(valid.records()); - error += (valid.targets() - pred).mapv(|v| v * v).sum(); + press += (valid.targets() - pred).mapv(|v| v * v).sum(); + tss += (valid.targets() - yt_mean).mapv(|v| v * v).sum(); } - (error / F::cast(fold)).sqrt() / yt.mean().unwrap() + F::one() - press / tss } - /// Leave one out cross validation - fn loocv_score(&self) -> F { - self.cv_score(self.training_data().0.nrows()) + /// Q2 predictive coefficient with Leave-One-Out Cross-Validation + fn looq2_score(&self) -> F { + self.q2_score(self.training_data().0.nrows()) } } -impl CrossValScore, Self> +impl PredictScore, Self> for GaussianProcess where F: Float, @@ -60,7 +65,7 @@ where } } -impl CrossValScore, Self> for SparseGaussianProcess +impl PredictScore, Self> for SparseGaussianProcess where F: Float, Corr: correlation_models::CorrelationModel, @@ -80,7 +85,7 @@ mod test { use crate::{Inducings, SparseKriging}; use approx::assert_abs_diff_eq; use egobox_doe::{Lhs, SamplingMethod}; - use ndarray::{Array, Array1, Axis, Data, Ix2, Zip, array}; + use ndarray::{Array, Array1, ArrayBase, Axis, Data, Ix2, Zip, array}; use ndarray_rand::RandomExt; use ndarray_rand::rand::SeedableRng; use ndarray_rand::rand_distr::{Normal, Uniform}; @@ -101,7 +106,7 @@ mod test { } #[test] - fn test_cv_gp_griewank() { + fn test_q2_gp_griewank() { let dims = [5]; // , 10, 60]; let nts = [100]; // , 300, 500]; let lim = array![[-600., 600.]]; @@ -127,8 +132,8 @@ mod test { .fit(&Dataset::new(xt, yt)) .expect("GP fit error"); - assert_abs_diff_eq!(gp.loocv_score(), 0., epsilon = 1e-2); - assert_abs_diff_eq!(gp.cv_score(10), 0., epsilon = 1e-2); + assert_abs_diff_eq!(gp.looq2_score(), 1., epsilon = 1e-2); + assert_abs_diff_eq!(gp.q2_score(10), 1., epsilon = 1e-2); }); } @@ -151,7 +156,7 @@ mod test { } #[test] - fn test_cv_sgp() { + fn test_q2_sgp() { let mut rng = Xoshiro256Plus::seed_from_u64(42); // Generate training data let nt = 200; @@ -164,7 +169,7 @@ mod test { .fit(&Dataset::new(xt.clone(), yt.clone())) .expect("GP fitted"); - assert_abs_diff_eq!(sgp.loocv_score(), 13.73, epsilon = 3.1); - assert_abs_diff_eq!(sgp.cv_score(10), 62.16, epsilon = 3.); + assert_abs_diff_eq!(sgp.looq2_score(), 1., epsilon = 2e-2); + assert_abs_diff_eq!(sgp.q2_score(10), 1., epsilon = 2e-2); } } diff --git a/crates/gpqa/Cargo.toml b/crates/gpqa/Cargo.toml new file mode 100644 index 00000000..2fbdad67 --- /dev/null +++ b/crates/gpqa/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "gpqa" +authors.workspace = true +version.workspace = true +license.workspace = true +edition.workspace = true +homepage.workspace = true +description = "Utility to assess Egor GPs quality saved using GP recorder" +repository = "https://github.com/relf/egobox/crates/gpqa" +keywords = ["machine-learning", "bayesian", "optimization"] +categories = ["algorithms", "mathematics", "science"] + +[dependencies] +egobox-ego = { version = "0.32", path = "../ego" } +egobox-moe = { version = "0.32", path = "../moe" } +anyhow.workspace = true +clap.workspace = true +bincode.workspace = true +rayon.workspace = true + +[[bin]] +name = "gpqa" diff --git a/crates/gpqa/src/main.rs b/crates/gpqa/src/main.rs new file mode 100644 index 00000000..703ec12c --- /dev/null +++ b/crates/gpqa/src/main.rs @@ -0,0 +1,78 @@ +use anyhow::Result; +use clap::Parser; +use egobox_moe::GpMixture; +use egobox_moe::MixtureGpSurrogate; +use rayon::prelude::*; +use std::fs; + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Binary GP file generated by Egor optimizer + filename: String, + + /// Use leave one out cross validation procedure + #[arg(short, long, default_value_t = false)] + loo: bool, + + /// Use K folding cross validation procedure + #[arg(short, long, default_value = "10")] + kfold: usize, +} + +fn main() -> Result<()> { + let args = Args::parse(); + + let data: Vec = fs::read(&args.filename)?; + + // Try to load a vector of GP models (Egor optimizer save) + let gp_models: Vec> = + bincode::serde::decode_from_slice(&data, bincode::config::standard()) + .map(|(res, _)| res) + .unwrap_or_default(); + + let gp_models = if gp_models.is_empty() { + // Try to load a single GP model (Gpx save) + let gp: Box = + bincode::serde::decode_from_slice(&data, bincode::config::standard()) + .map(|(res, _)| res)?; + vec![gp as Box] + } else { + gp_models + }; + + println!( + "Loaded {} GP model(s) from {}", + gp_models.len(), + args.filename + ); + + let (xt, _yt) = gp_models.first().unwrap().training_data(); + println!("Training data: {} samples ({}-dim)", xt.nrows(), xt.ncols()); + + println!( + "Computing Q2 and PVA with {}-fold cross-validation...", + if args.loo { xt.nrows() } else { args.kfold } + ); + + let _res: Vec<_> = gp_models + .par_iter() + .enumerate() + .map(|(i, gp)| { + let q2 = if args.loo { + gp.as_ref().looq2() + } else { + gp.as_ref().q2(args.kfold) + }; + + let pva = if args.loo { + gp.as_ref().loopva() + } else { + gp.as_ref().pva(args.kfold) + }; + println!("GP({i}): Q2={q2}, PVA={pva}"); + }) + .collect(); + + Ok(()) +} diff --git a/crates/moe/Cargo.toml b/crates/moe/Cargo.toml index 4cf86c9c..0024ef9b 100644 --- a/crates/moe/Cargo.toml +++ b/crates/moe/Cargo.toml @@ -52,7 +52,7 @@ thiserror.workspace = true serde = { version = "1", features = ["derive"], optional = true } serde_json = { version = "1", optional = true } -bincode = { version = "1.3.3", optional = true } +bincode = { workspace = true, optional = true } typetag = { version = "0.2", optional = true } diff --git a/crates/moe/src/algorithm.rs b/crates/moe/src/algorithm.rs index f19d5a87..b53aba8f 100644 --- a/crates/moe/src/algorithm.rs +++ b/crates/moe/src/algorithm.rs @@ -3,11 +3,10 @@ use crate::clustering::{find_best_number_of_clusters, sort_by_cluster}; use crate::errors::MoeError; use crate::errors::Result; use crate::parameters::{GpMixtureParams, GpMixtureValidParams}; -use crate::types::*; +use crate::{GpScore, types::*}; use crate::{GpType, expertise_macros::*}; use crate::{NbClusters, surrogates::*}; -use egobox_gp::metrics::CrossValScore; use egobox_gp::{GaussianProcess, SparseGaussianProcess, correlation_models::*, mean_models::*}; use linfa::dataset::Records; use linfa::traits::{Fit, Predict, PredictInplace}; @@ -478,7 +477,7 @@ impl Clustered for GpMixture { } } -#[cfg_attr(feature = "serializable", typetag::serde)] +#[typetag::serde] impl GpSurrogate for GpMixture { fn dims(&self) -> (usize, usize) { self.experts[0].dims() @@ -497,6 +496,7 @@ impl GpSurrogate for GpMixture { Recombination::Smooth(_) => self.predict_var_smooth(x), } } + /// Save Moe model in given file. #[cfg(feature = "persistent")] fn save(&self, path: &str, format: GpFileFormat) -> Result<()> { @@ -504,7 +504,10 @@ impl GpSurrogate for GpMixture { let bytes = match format { GpFileFormat::Json => serde_json::to_vec(self).map_err(MoeError::SaveJsonError)?, - GpFileFormat::Binary => bincode::serialize(self).map_err(MoeError::SaveBinaryError)?, + GpFileFormat::Binary => { + bincode::serde::encode_to_vec(self, bincode::config::standard()) + .map_err(MoeError::SaveBinaryError)? + } }; file.write_all(&bytes)?; @@ -512,7 +515,7 @@ impl GpSurrogate for GpMixture { } } -#[cfg_attr(feature = "serializable", typetag::serde)] +#[typetag::serde] impl GpSurrogateExt for GpMixture { fn predict_gradients(&self, x: &ArrayView2) -> Result> { match self.recombination { @@ -539,7 +542,7 @@ impl GpSurrogateExt for GpMixture { } } -impl CrossValScore, Self> for GpMixture { +impl GpScore, Self> for GpMixture { fn training_data(&self) -> &(Array2, Array1) { &self.training_data } @@ -549,6 +552,28 @@ impl CrossValScore, Self> for GpMixture { } } +#[typetag::serde] +impl GpQualityAssurance for GpMixture { + fn training_data(&self) -> &(Array2, Array1) { + (self as &dyn GpScore<_, _, _>).training_data() + } + + fn q2(&self, kfold: usize) -> f64 { + (self as &dyn GpScore<_, _, _>).q2_score(kfold) + } + fn looq2(&self) -> f64 { + (self as &dyn GpScore<_, _, _>).looq2_score() + } + + fn pva(&self, kfold: usize) -> f64 { + (self as &dyn GpScore<_, _, _>).pva_score(kfold) + } + fn loopva(&self) -> f64 { + (self as &dyn GpScore<_, _, _>).loopva_score() + } +} + +#[typetag::serde] impl MixtureGpSurrogate for GpMixture { /// Selected experts in the mixture fn experts(&self) -> &Vec> { @@ -873,13 +898,16 @@ impl GpMixture { // error / self.ytrain.std(1.) // } + /// Load Moe from the given file. #[cfg(feature = "persistent")] - /// Load Moe from given json file. pub fn load(path: &str, format: GpFileFormat) -> Result> { let data = fs::read(path)?; let moe = match format { - GpFileFormat::Json => serde_json::from_slice(&data).unwrap(), - GpFileFormat::Binary => bincode::deserialize(&data).unwrap(), + GpFileFormat::Json => serde_json::from_slice(&data)?, + GpFileFormat::Binary => { + bincode::serde::decode_from_slice(&data, bincode::config::standard()) + .map(|(surrogate, _)| surrogate)? + } }; Ok(Box::new(moe)) } @@ -1014,7 +1042,7 @@ mod tests { moe.predict(&array![[0.82]]).unwrap()[0], epsilon = 1e-4 ); - println!("LOOCV = {}", moe.loocv_score()); + println!("LOOQ2 = {}", moe.looq2_score()); } #[test] diff --git a/crates/moe/src/clustering.rs b/crates/moe/src/clustering.rs index 95185806..c334c8cc 100644 --- a/crates/moe/src/clustering.rs +++ b/crates/moe/src/clustering.rs @@ -21,7 +21,7 @@ fn median(v: &[f64]) -> f64 { list.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); let len = list.len(); let mid = len / 2; - if len % 2 == 0 { + if len.is_multiple_of(2) { mean(&list[(mid - 1)..(mid + 1)]) } else { list[mid] diff --git a/crates/moe/src/errors.rs b/crates/moe/src/errors.rs index 1aad2bb6..2eace24b 100644 --- a/crates/moe/src/errors.rs +++ b/crates/moe/src/errors.rs @@ -34,7 +34,11 @@ pub enum MoeError { /// When error during saving #[cfg(feature = "persistent")] #[error("Save error: {0}")] - SaveBinaryError(#[from] bincode::Error), + SaveBinaryError(#[from] bincode::error::EncodeError), + /// When error during loading + #[cfg(feature = "persistent")] + #[error("Load error: {0}")] + LoadBinaryError(#[from] bincode::error::DecodeError), /// When error during loading #[error("Load IO error")] LoadIoError(#[from] std::io::Error), diff --git a/crates/moe/src/lib.rs b/crates/moe/src/lib.rs index b4b09cd9..b8db6be1 100644 --- a/crates/moe/src/lib.rs +++ b/crates/moe/src/lib.rs @@ -93,11 +93,13 @@ mod surrogates; mod types; mod algorithm; +mod metrics; mod parameters; pub use clustering::*; pub use errors::*; pub use gaussian_mixture::*; +pub use metrics::*; pub use surrogates::*; pub use types::*; diff --git a/crates/moe/src/metrics.rs b/crates/moe/src/metrics.rs new file mode 100644 index 00000000..5f42a106 --- /dev/null +++ b/crates/moe/src/metrics.rs @@ -0,0 +1,111 @@ +use linfa::dataset::Dataset; +use linfa::prelude::Records; +use linfa::{ParamGuard, traits::Fit}; +use ndarray::{Array1, Array2}; + +use crate::GpSurrogate; + +/// A trait for cross validation score +pub trait GpScore +where + ER: std::error::Error + From, + P: Fit, Array1, ER, Object = O> + ParamGuard, + O: GpSurrogate, +{ + fn training_data(&self) -> &(Array2, Array1); + + fn params(&self) -> P; + + /// Compute quality metric Q2 with kfold cross validation + fn q2_score(&self, kfold: usize) -> f64 { + let (xt, yt) = self.training_data(); + let dataset = Dataset::new(xt.to_owned(), yt.to_owned()); + let yt_mean = yt.mean().unwrap(); + // Predictive Residual Sum of Squares + let mut press = 0.; + // Total Sum of Squares + let mut tss = 0.; + for (train, valid) in dataset.fold(kfold).into_iter() { + let params = self.params(); + let model: O = params + .fit(&train) + .expect("cross-validation: sub model fitted"); + let pred = model.predict(&valid.records().view()).unwrap(); + press += (valid.targets() - pred).mapv(|v| v * v).sum(); + tss += (valid.targets() - yt_mean).mapv(|v| v * v).sum(); + } + 1. - press / tss + } + + /// Q2 predictive coefficient with Leave-One-Out Cross-Validation + fn looq2_score(&self) -> f64 { + self.q2_score(self.training_data().0.nrows()) + } + + /// Predictive variance adequacy + fn pva_score(&self, kfold: usize) -> f64 { + let (xt, yt) = self.training_data(); + let dataset = Dataset::new(xt.to_owned(), yt.to_owned()); + // Total Sum of Squares + let mut varss = 0.; + // Number of fold + let mut n = 0usize; + for (train, valid) in dataset.fold(kfold).into_iter() { + let params = self.params(); + let model: O = params + .fit(&train) + .expect("cross-validation: sub model fitted"); + let pred = model.predict(&valid.records().view()).unwrap(); + let var = model.predict_var(&valid.records().view()).unwrap(); + varss += ((valid.targets() - &pred).mapv(|v| v * v) / var).sum(); + n += valid.nsamples(); + } + (varss / n as f64).ln().abs() + } + + /// Q2 predictive coefficient with Leave-One-Out Cross-Validation + fn loopva_score(&self) -> f64 { + self.pva_score(self.training_data().0.nrows()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::GpMixtureParams; + use approx::assert_abs_diff_eq; + use egobox_doe::{Lhs, SamplingMethod}; + use ndarray::{Array1, array}; + use ndarray_rand::rand::SeedableRng; + use rand_xoshiro::Xoshiro256Plus; + + fn x_squared(x: &Array2) -> Array1 { + x.mapv(|v| v * v).sum_axis(ndarray::Axis(1)) + } + + #[test] + fn test_gpqa_griewank() { + let dims = [2]; + let nts = [20]; + let lim = array![[-10., 10.]]; + + (0..dims.len()).for_each(|i| { + let dim = dims[i]; + let nt = nts[i]; + let xlimits = lim.broadcast((dim, 2)).unwrap(); + + let rng = Xoshiro256Plus::seed_from_u64(42); + let xt = Lhs::new(&xlimits).with_rng(rng).sample(nt); + let yt = x_squared(&xt); + + let moe = GpMixtureParams::default() + .fit(&Dataset::new(xt, yt)) + .expect("GP fit error"); + + assert_abs_diff_eq!(moe.q2_score(10), 1., epsilon = 1e-3); + assert_abs_diff_eq!(moe.looq2_score(), 1., epsilon = 1e-3); + assert_abs_diff_eq!(moe.pva_score(10), 0., epsilon = 2e-1); + assert_abs_diff_eq!(moe.loopva_score(), 0., epsilon = 2e-1); + }); + } +} diff --git a/crates/moe/src/surrogates.rs b/crates/moe/src/surrogates.rs index 05c55f3b..47691548 100644 --- a/crates/moe/src/surrogates.rs +++ b/crates/moe/src/surrogates.rs @@ -43,7 +43,7 @@ pub trait SgpSurrogateParams: GpSurrogateParams { } /// A trait for a base GP surrogate -#[cfg_attr(feature = "serializable", typetag::serde(tag = "type"))] +#[cfg_attr(feature = "serializable", typetag::serde(tag = "type_gp"))] pub trait GpSurrogate: std::fmt::Display + Sync + Send { /// Returns input/output dims fn dims(&self) -> (usize, usize); @@ -62,7 +62,7 @@ pub trait GpSurrogate: std::fmt::Display + Sync + Send { } /// A trait for a GP surrogate with derivatives predictions and sampling -#[cfg_attr(feature = "serializable", typetag::serde(tag = "type"))] +#[cfg_attr(feature = "serializable", typetag::serde(tag = "type_gpext"))] pub trait GpSurrogateExt { /// Predict derivatives at n points and return (n, xdim) matrix /// where each column is the partial derivatives wrt the ith component @@ -75,7 +75,7 @@ pub trait GpSurrogateExt { } /// A trait for a GP surrogate. -#[cfg_attr(feature = "serializable", typetag::serde(tag = "type"))] +#[cfg_attr(feature = "serializable", typetag::serde(tag = "type_gpparam"))] pub trait GpParameterized { fn theta(&self) -> &Array1; fn variance(&self) -> f64; @@ -84,11 +84,11 @@ pub trait GpParameterized { } /// A trait for a GP surrogate. -#[cfg_attr(feature = "serializable", typetag::serde(tag = "type"))] +#[cfg_attr(feature = "serializable", typetag::serde(tag = "type_fullgp"))] pub trait FullGpSurrogate: GpParameterized + GpSurrogate + GpSurrogateExt {} /// A trait for a Sparse GP surrogate. -#[cfg_attr(feature = "serializable", typetag::serde(tag = "type"))] +#[cfg_attr(feature = "serializable", typetag::serde(tag = "type_sgp"))] pub trait SgpSurrogate: FullGpSurrogate {} /// A macro to declare GP surrogate using regression model and correlation model names. @@ -171,7 +171,7 @@ macro_rules! declare_surrogate { GpFileFormat::Json => serde_json::to_vec(self as &dyn GpSurrogate) .map_err(MoeError::SaveJsonError)?, GpFileFormat::Binary => { - bincode::serialize(self as &dyn GpSurrogate).map_err(MoeError::SaveBinaryError)? + bincode::serde::encode_to_vec(self as &dyn GpSurrogate, bincode::config::standard()).map_err(MoeError::SaveBinaryError)? } }; file.write_all(&bytes)?; @@ -333,7 +333,7 @@ macro_rules! declare_sgp_surrogate { GpFileFormat::Json => serde_json::to_vec(self as &dyn SgpSurrogate) .map_err(MoeError::SaveJsonError)?, GpFileFormat::Binary => { - bincode::serialize(self as &dyn SgpSurrogate).map_err(MoeError::SaveBinaryError)? + bincode::serde::encode_to_vec(self as &dyn SgpSurrogate, bincode::config::standard()).map_err(MoeError::SaveBinaryError)? } }; file.write_all(&bytes)?; @@ -409,8 +409,12 @@ pub fn load(path: &str, format: GpFileFormat) -> Result> { MoeError::LoadError(format!("Error while loading from {path}: ({err})")) }) } - GpFileFormat::Binary => bincode::deserialize(&data) - .map_err(|err| MoeError::LoadError(format!("Error while loading from {path} ({err})"))), + GpFileFormat::Binary => bincode::serde::decode_from_slice::, _>( + &data, + bincode::config::standard(), + ) + .map(|(surrogate, _)| surrogate) + .map_err(|err| MoeError::LoadError(format!("Error while loading from {path} ({err})"))), } } diff --git a/crates/moe/src/types.rs b/crates/moe/src/types.rs index cf84ff75..42770cf7 100644 --- a/crates/moe/src/types.rs +++ b/crates/moe/src/types.rs @@ -8,6 +8,7 @@ use egobox_gp::correlation_models::{ #[allow(unused_imports)] use egobox_gp::mean_models::{ConstantMean, LinearMean, QuadraticMean}; use linfa::Float; +use ndarray::{Array1, Array2}; use std::fmt::Display; #[cfg(feature = "serializable")] @@ -121,8 +122,20 @@ impl Clustering { } } +#[typetag::serde(tag = "type_gpqa")] +pub trait GpQualityAssurance { + fn training_data(&self) -> &(Array2, Array1); + fn q2(&self, kfold: usize) -> f64; + fn looq2(&self) -> f64; + fn pva(&self, kfold: usize) -> f64; + fn loopva(&self) -> f64; +} + /// A trait for Mixture of GP surrogates with derivatives using clustering -pub trait MixtureGpSurrogate: Clustered + GpSurrogate + GpSurrogateExt { +#[typetag::serde(tag = "type_mixture")] +pub trait MixtureGpSurrogate: + Clustered + GpSurrogate + GpSurrogateExt + GpQualityAssurance +{ fn experts(&self) -> &Vec>; } diff --git a/python/pyproject.toml b/python/pyproject.toml index 9eb5e298..9506c484 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -15,6 +15,7 @@ classifiers = [ requires-python = ">=3.9" dependencies = ["numpy"] dynamic = ["version"] +readme = 'README.md' [build-system] build-backend = "maturin" diff --git a/python/src/gp_mix.rs b/python/src/gp_mix.rs index 789e6601..3d1ae36b 100644 --- a/python/src/gp_mix.rs +++ b/python/src/gp_mix.rs @@ -14,8 +14,7 @@ use std::{cmp::Ordering, path::Path}; use crate::gp_config::GpConfig; use crate::types::*; use egobox_ego::{EGO_GP_OPTIM_MAX_EVAL, EGO_GP_OPTIM_N_START}; -use egobox_gp::metrics::CrossValScore; -use egobox_moe::{Clustered, MixtureGpSurrogate, NbClusters, ThetaTuning}; +use egobox_moe::{Clustered, GpQualityAssurance, MixtureGpSurrogate, NbClusters, ThetaTuning}; #[allow(unused_imports)] // Avoid linting problem use egobox_moe::{GpMixture, GpSurrogate, GpSurrogateExt}; use linfa::{Dataset, traits::Fit};