diff --git a/.lintr b/.lintr index ee2ad764..44be3152 100644 --- a/.lintr +++ b/.lintr @@ -5,5 +5,6 @@ linters: linters_with_defaults( object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names cyclocomp_linter = NULL, # do not check function complexity commented_code_linter = NULL, # allow code in comments - line_length_linter = line_length_linter(120) + line_length_linter = line_length_linter(120), + indentation_linter = indentation_linter(indent = 2, hanging_indent_style = "never") ) diff --git a/DESCRIPTION b/DESCRIPTION index 7c362175..8d1f62e9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,8 @@ Suggests: evaluate, mirai, paradox, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + utils Config/testthat/edition: 3 Config/testthat/parallel: true Encoding: UTF-8 @@ -44,6 +45,7 @@ Collate: 'named_list.R' 'Callback.R' 'Context.R' + 'Mlr3Component.R' 'as_factor.R' 'as_short_string.R' 'assert_ro_binding.R' diff --git a/NAMESPACE b/NAMESPACE index bff10f51..3f8d9671 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ S3method(distinct_values,logical) S3method(format,Mlr3Error) S3method(format,Mlr3Warning) S3method(hash_input,"function") +S3method(hash_input,R6) S3method(hash_input,data.table) S3method(hash_input,default) S3method(insert_named,"NULL") @@ -46,6 +47,7 @@ export("get_private<-") export(Callback) export(Context) export(Dictionary) +export(Mlr3Component) export(as_callback) export(as_callbacks) export(as_factor) @@ -128,6 +130,7 @@ export(map_int) export(map_lgl) export(map_values) export(messagef) +export(mlr3component_deprecation_msg) export(mlr_callbacks) export(modify_at) export(modify_if) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R new file mode 100644 index 00000000..a1464c3d --- /dev/null +++ b/R/Mlr3Component.R @@ -0,0 +1,466 @@ +#' @title Common Base Class for mlr3 Components +#' +#' @description +#' A pragmatic, lightweight base class that captures common patterns across the mlr3 ecosystem. +#' It standardizes various fields and provides shared methods for printing, help lookup, setting parameter values and fields, and hashing. +#' +#' Semantically, an mlr3 component is usually an object representing an algorithm or a method, such as a [mlr3::Learner] or a [mlr3::Measure]. +#' This algorithm can be configured though its parameters, accessible as the [paradox::ParamSet] in the `$param_set` field, as well as +#' through various other custom algorithm-specific fields. +#' All of these together can be changed via the `$configure()` method. +#' Some components, such as prominently the [mlr3::Learner], also have "state", such as the learned model. +#' +#' The identity of an object represented by an [`Mlr3Component`] is sometimes important, for example when aggregating benchmark results accross various settings of different algorithms used in tha benchmark, such as different [mlr3::Learner]s or different [mlr3::Measure]. +#' For this,the component provides a `$hash` field, which should identify the algorithm and its configuration, without including the state. +#' There is also the `$phash` field, which identifies the algorithm without its `$param_set` configuration -- this is used when aggregating benchmark results for individual algorithms when these algorithms were evaluated for different configuration parameter settings. +#' +#' [`Mlr3Component`]s should usually be constructed from a [Dictionary], which should be accessible via a short-form, such as [mlr3::lrn] or [mlr3pipelines::po]. +#' +#' @section Inheriting: +#' To create your own `Mlr3Component`, you need to overload the `$initialize()` function. +#' A concrete class should ideally provide all arguments of the `$initialize()` directly, i.e. the user should not need to provide `id` or `param_set`. +#' +#' The information contained in a concrete mlr3 component should usually be completely determined by four things: +#' +#' 1. The *construction arguments* given to the `$initialize()` method of the concrete class. +#' These can be [`Mlr3Component`]s themselves, or configuration options that do not naturally fit into the [paradox::ParamSet]. +#' These arguments should *not* overlap with the [paradox::ParamSet] parameters, and should not be the construction arguments of the +#' abstract [Mlr3Component] base class such as `id` or `packages`. +#' 2. The *configuration arguments* inside `$param_set$values`. +#' 3. Additional *configuration settings* that influence the behavior of the component, but are not part of the [paradox::ParamSet] because they do not naturally constitute a dimension that could be optimized. +#' 4. Some additional *state information*, storing the result of the algorithm, such as the learned model, often contained in a field called `$state`. +#' +#' Information from 1. should also be made accessible as active bindings, with the same name as the construction arguments. +#' +#' The information from 1., and 3. is contained in the `$phash` value. +#' For this, the `private$.additional_phash_input()` function needs to be overloaded by subclasses. +#' It is often sufficient for an abstract subclass to implement this, and concrete classes to inherit from this. E.g. the [mlr3::Learner] class implements `$private$.additional_phash_input()` to return the necessary iformation to be included in the `$phash` for almost all possible [mlr3::Learner]s. +#' Only concrete [mlr3::Learner]s that contain additional information not contained in one of the standard fields needs to overload the function again, such as e.g. [mlr3tuning::AutoTuner]. +#' It is best if this second overload only collects the additional information not contained in the abstract base class, and also calls `super$.additional_phash_input()`. +#' +#' The information from 1., 2., and 3. together is contained in the `$phash` value. +#' It is also collected automatically from the `private$.additional_phash_input()` function, as well as the `$param_set$values` field. +#' +#' The information from 3. should be made available through the `additional_configuration` construction argument of [`Mlr3Component`]. +#' +#' @section Cloning: +#' [`Mlr3Component`] implements a `private$deep_clone()` method that automatically clones R6 objects stored directly in the object, as well as in `$param_set$values`. +#' Because of the way the `$param_set` field is handled, subclasses that need to do additional cloning should overload this function, but always call `super$deep_clone(name, value)` for values they do not handle. +#' +#' +#' @export +Mlr3Component = R6Class("Mlr3Component", + public = list( + #' @description + #' Construct a new `Mlr3Component`. + #' @param dict_entry (`character(1)`) + #' The entry in the dictionary by which this component can be constructed. + #' @param dict_shortaccess (`character(1)`) + #' Name of the short access function for the dictionary that this component can be found under. + #' `get(dict_shortaccess, mode = "function")(dict_entry)` should create an object of the concrete class. + #' @param id (`character(1)`) + #' The ID of the constructed object. + #' ID field can be used to identify objects in tables or plots, and sometimes to prefix parameter names in + #' combined [paradox::ParamSet]s. + #' If instances of a given abstract subclass should all not have IDs, this should be set to `NULL`. + #' Should default to the value of `dict_entry` in most cases, with few exceptions for wrapper objects (e.g. a + #' [mlr3pipelines::PipeOp] wrapping a [mlr3::Learner]). + #' @param param_set ([paradox::ParamSet] | `list` | `NULL`) + #' Parameter space description. This should be created by the subclass and given to `super$initialize()`. + #' If this is a [`ParamSet`][paradox::ParamSet], it is used for `$param_set` directly. + #' Otherwise it must be a `list` of expressions e.g. created by `alist()` that evaluate to + #' [`ParamSet`][paradox::ParamSet]s. + #' These [`ParamSet`][paradox::ParamSet] are combined using a + #' [`ParamSetCollection`][paradox::ParamSetCollection].\cr + #' If instances of a given abstract subclass should all not have a [paradox::ParamSet], this should be set to + #' `NULL`. + #' Otherwise, if a concrete subclass just happens to have an empty search space, the default [paradox::ps()] + #' should be used. + #' @param packages (`character()`) + #' The packages required by the constructed object. + #' The constructor will check whether these packages can be loaded and give a warning otherwise. + #' The packages of the R6 objects in the inheritance hierarchy are automatically added and do not need to be + #' provided here. + #' Elements of `packages` are deduplicated and made accessible as the `$packages` field. + #' @param properties (`character()`) + #' A set of properties/capabilities the object has. + #' These often need to be a subset of an entry in [mlr3::mlr_reflections]. + #' However, the [`Mlr3Component`] constructor does not check this, it needs to be asserted by an abstract + #' inheriting class. + #' Elements are deduplicated and made accessible as the `$properties` field. + #' @param additional_configuration (`character()`) + #' Names of class fields that constitute additional configuration settings that influence the behavior of the + #' component, but are neither construction argument, nor part of the [paradox::ParamSet]. + #' An example is the `$predict_type` field of a [mlr3::Learner]. + #' @param representable (`logical(1)`) + #' Whether the object can be represented as a simple string. + #' Should generally be `TRUE` except for objects that are constructed with a large amount of data, such as + #' [mlr3::Task]s. + initialize = function(dict_entry, dict_shortaccess, id = dict_entry, + param_set = ps(), packages = character(0), properties = character(0), + additional_configuration = character(0), + representable = TRUE + ) { + private$.dict_entry = assert_string(dict_entry) + private$.dict_shortaccess = assert_string(dict_shortaccess) + private$.representable = assert_flag(representable) + private$.has_id = !is.null(id) + if (private$.has_id) { + self$id = id + } else { + # if ID is not provided, it is read-only set to the dictionary entry + private$.id = dict_entry + } + + if (!is.null(packages)) { + assert_character(packages, any.missing = FALSE, min.chars = 1L) + check_packages_installed(packages, + msg = sprintf("Package '%%s' required but not installed for %s '%s'", + class(self)[1L], self$id)) + env = self$.__enclos_env__ + while (!is.null(env)) { + newpkg = topenv(env)$.__NAMESPACE__.$spec[["name"]] + if (length(newpkg) == 1L) { + packages[[length(packages) + 1]] = newpkg + } + env = env$super$.__enclos_env__ + } + private$.packages = unique(packages[packages != "mlr3misc"]) + } + + private$.properties = unique(assert_character(properties, any.missing = FALSE, min.chars = 1L, null.ok = TRUE)) + + # ParamSet is optional to keep this base class generic across components + if (is.null(param_set)) { + private$.param_set = NULL + private$.param_set_source = NULL + } else if (inherits(param_set, "ParamSet")) { + private$.param_set = paradox::assert_param_set(param_set) + private$.param_set_source = NULL + } else { + lapply(param_set, function(x) paradox::assert_param_set(eval(x))) + private$.param_set_source = param_set + } + + assert_character(additional_configuration, any.missing = FALSE, min.chars = 1L, unique = TRUE) + assert_subset(additional_configuration, names(self)) + assert_disjunct(additional_configuration, c( + # additional configuration can not be: + # (1) a parameter, (2) a construction argument (these are captured automatically), (3) a standard field + if (!is.null(self$param_set)) self$param_set$ids(), + names(formals(self$initialize)), + "id", "label", "param_set", "packages", "properties", "format", "print", "help", "configure", "override_info", + "man", "hash", "phash" + )) + private$.additional_configuration = additional_configuration + }, + + #' @description + #' Helper for print outputs. + #' @param ... (ignored). + format = function(...) { + if (private$.has_id) { + sprintf("<%s:%s>", class(self)[1L], self$id) + } else { + sprintf("<%s>", class(self)[1L]) + } + }, + + #' @description + #' Printer. + #' @param ... (ignored). + print = function(...) { + msg_h = if (is.null(self$label) || is.na(self$label)) "" else paste0(": ", self$label) + cat_cli({ + cli::cli_h1("{.cls {class(self)[1L]}} ({self$id}){msg_h}") + cli::cli_li("Parameters: {as_short_string(if (is.null(private$.param_set)) list() else private$.param_set$values, 1000L)}") # nolint + cli::cli_li("Packages: {.pkg {if (length(self$packages)) self$packages else '-'}}") + }) + }, + + #' @description + #' Opens the corresponding help page referenced by field `$man`. + help = function() { + open_help(self$man) + }, + + #' @description + #' Set parameter values and fields in one step. + #' Named arguments overlapping with the [`ParamSet`][paradox::ParamSet] are set as parameters; + #' remaining arguments are assumed to be regular fields of the object. + #' @param ... (named `any`) + #' @param .values (named `list()`) + configure = function(..., .values = list()) { + dots = list(...) + assert_list(dots, names = "unique") + assert_list(.values, names = "unique") + assert_disjunct(names(dots), names(.values)) + new_values = insert_named(dots, .values) + + # set params in ParamSet if available + if (!is.null(private$.param_set) && length(new_values)) { + param_ids = private$.param_set$ids() + ii = names(new_values) %in% param_ids + if (any(ii)) { + private$.param_set$values = insert_named(private$.param_set$values, new_values[ii]) + new_values = new_values[!ii] + } + } else { + param_ids = character() + } + + # remaining args become fields + if (length(new_values)) { + ndots = names(new_values) + for (i in seq_along(new_values)) { + nn = ndots[[i]] + if (!exists(nn, envir = self, inherits = FALSE)) { + stopf("Cannot set argument '%s' for '%s' (not a parameter, not a field).%s", + nn, class(self)[1L], did_you_mean(nn, setdiff(names(self), ".__enclos_env__"))) # nolint + } + self[[nn]] = new_values[[i]] + } + } + + invisible(self) + }, + + #' @description + #' Override the `man` and `hash` fields. + #' @param man (`character(1)` | `NULL`) + #' The manual page of the component. + #' @param hash (`character(1)` | `NULL`) + #' The hash of the component. + override_info = function(man = NULL, hash = NULL) { + if (!is.null(man)) { + private$.man = man + private$.label = NULL + } + if (!is.null(hash)) { + private$.hashmap = NULL + orig_phash = self$phash + orig_hash = self$hash + private$.hashmap = structure(c(hash, hash), names = c(orig_hash, orig_phash)) + } + invisible(self) + } + ), + + active = list( + #' @field id (`character(1)`) + #' Identifier of the object. + #' Used in tables, plot and text output. + id = function(rhs) { + if (!missing(rhs)) { + if (!private$.has_id) { + stop("id is read-only") + } + private$.id = assert_string(rhs, min.chars = 1L) + } + private$.id + }, + + #' @field label (`character(1)`) + #' Human-friendly label. + #' Can be used in tables, plot and text output instead of the ID. + #' Auto-generated from the title of the help page. + label = function(rhs) { + if (!missing(rhs)) stop("label is read-only") + if (is.null(private$.label)) { + helpinfo = self$help() + helpcontent = NULL + if (inherits(helpinfo, "help_files_with_topic") && length(helpinfo)) { + ghf = get(".getHelpFile", mode = "function", envir = getNamespace("utils")) + helpcontent = ghf(helpinfo) + } else if (inherits(helpinfo, "dev_topic")) { + helpcontent = tools::parse_Rd(helpinfo$path) + } + if (is.null(helpcontent)) { + private$.label = "LABEL COULD NOT BE RETRIEVED" + } else { + private$.label = Filter(function(x) identical(attr(x, "Rd_tag"), "\\title"), helpcontent)[[1]][[1]][1] + } + } + private$.label + }, + + #' @field packages (`character()`) + #' Set of required packages. + #' These packages are loaded, but not attached. + #' Absence of these packages will generate a warning during construction. + packages = function(rhs) { + if (!missing(rhs)) stop("packages is read-only") + private$.packages + }, + + #' @field properties (`character()`)\cr + #' Stores a set of properties/capabilities the object has. + #' These are set during construction and should not be changed afterwards. + #' They may be "optimistic" in the sense that the true capabilities could depend on specific configuration + #' parameter settings; + #' `$properties` then indicate the capabilities under favorable configuration settings. + properties = function(rhs) { + if (!missing(rhs)) { + mlr3component_deprecation_msg("writing to properties is deprecated. Write to private$.properties if this is necessary for tests.") # nolint + # stop("properties is read-only") + private$.properties = rhs + } + private$.properties + }, + + #' @field param_set ([paradox::ParamSet] | `NULL`) + #' Set of hyperparameters. + param_set = function(val) { + if (is.null(private$.param_set) && !is.null(private$.param_set_source)) { + sourcelist = lapply(private$.param_set_source, function(x) eval(x)) + if (length(sourcelist) > 1) { + private$.param_set = paradox::ParamSetCollection$new(sourcelist) + } else { + private$.param_set = sourcelist[[1]] + } + } + if (!missing(val) && !identical(val, private$.param_set)) { + stop("param_set is read-only.") + } + private$.param_set + }, + + #' @field man (`character(1)`) + #' String in the format `[pkg]::[class name]` pointing to a manual page for this object. + #' Inferred automatically from the class name and package in which the class is defined. + #' If a concrete class is not defined in a package, the help page of its first parent class with a help page is + #' used. + #' Can be overridden with the `$override_info()` method. + man = function(rhs) { + if (!missing(rhs)) { + mlr3component_deprecation_msg("writing to man is deprecated") + private$.man = rhs + # stop("man is read-only") + } + if (is.null(private$.man)) { + iter = 1 + env = self + while (!is.null(env)) { + pkgstring = topenv(env$.__enclos_env__)$.__NAMESPACE__.$spec[["name"]] + classstring = class(self)[[iter]] + man = paste0(pkgstring, "::", classstring) + help_works = tryCatch({ + length(as.character(open_help(man))) > 0L || + NROW(utils::help.search(sprintf("^%s$", classstring), package = pkgstring, + ignore.case = FALSE, agrep = FALSE, fields = "alias")$matches) > 0L + }, error = function(e) FALSE) + if (help_works) { + private$.man = man + break + } + iter = iter + 1 + env = env$.__enclos_env__$super + } + if (is.null(private$.man)) { + private$.man = "mlr3misc::Mlr3Component" + } + } + private$.man + }, + + #' @field hash (`character(1)`) + #' Stable hash that includes id, parameter values (if present) and additional configuration settings (from + #' construction or class fields) but not state. + #' Makes use of the `private$.additional_phash_input()` function to collect additional information, which must + #' therefore be implemented by subclasses. + hash = function(rhs) { + if (!missing(rhs)) stop("hash is read-only") + hash = calculate_hash(class(self), self$id, .list = c(self$param_set$values, private$.additional_phash_input())) + if (hash %in% names(private$.hashmap)) { + private$.hashmap[[hash]] + } else { + hash + } + }, + + #' @field phash (`character(1)`) + #' Hash that includes id and additional configuration settings (from construction or class fields) but not + #' parameter values and no state. + #' Makes use of the `private$.additional_phash_input()` function to collect additional information, which must + #' therefore be implemented by subclasses. + phash = function(rhs) { + if (!missing(rhs)) stop("phash is read-only") + hash = calculate_hash( + class(self), self$id, + list(private$.additional_phash_input()) + ) + if (hash %in% names(private$.hashmap)) { + private$.hashmap[[hash]] + } else { + hash + } + } + ), + + private = list( + .dict_entry = NULL, + .dict_shortaccess = NULL, + .representable = NULL, + .has_id = NULL, + .id = NULL, + .param_set = NULL, + .packages = NULL, + .properties = NULL, + .param_set_source = NULL, + .label = NULL, + .man = NULL, + .hashmap = NULL, + .additional_configuration = NULL, + + .additional_phash_input = function() { + if (is.null(self$initialize)) return(NULL) + sc = sys.calls() + # if we are called through `super$.additional_phash_input()` we are not complaining + if (length(sc) > 1 && identical(sc[[length(sc) - 1]][[1]], quote(super$.additional_phash_input))) return(NULL) + initformals <- names(formals(args(self$initialize))) + if (!test_subset(initformals, c("id", "param_vals"))) { + # nolint start + stopf("Class %s has construction arguments besides 'id' and 'param_vals' but does not overload the private '.additional_phash_input()' function. + +The hash and phash of a class must differ when it represents a different operation; since %s has construction arguments that could change the operation that is performed by it, it is necessary for the $hash and $phash to reflect this. `.additional_phash_input()` should return all the information (e.g. hashes of encapsulated items) that should additionally be hashed; read the help of ?Mlr3Component for more information.", + # nolint end + class(self)[[1]], class(self)[[1]]) + } + }, + + deep_clone = function(name, value) { + if (!is.null(private$.param_set_source)) { + private$.param_set = NULL # required to keep clone identical to original, otherwise tests get really ugly + if (name == ".param_set_source") { + value = lapply(value, function(x) { + if (inherits(x, "R6")) x$clone(deep = TRUE) else x + }) + } + } + if (is.environment(value) && ".__enclos_env__" %in% names(value) && "clone" %in% names(value)) { + return(value$clone(deep = TRUE)) + } + value + } + ) +) + +#' @title Deprecation Message related to the `Mlr3Component` Class +#' +#' @description +#' Will give different messages depending on deprecation progression and will be more agressive in tests than +#' interactively. +#' +#' @keywords internal +#' @param msg (`character(1)`) +#' Message to print. +#' @export +mlr3component_deprecation_msg = function(msg) { + if (!identical(getOption("mlr3.on_deprecated_mlr3component", "ignore"), "ignore")) { + if (identical(getOption("mlr3.on_deprecated_mlr3component"), "warn")) { + warning(msg) + } else { + stop(msg) + } + } +} diff --git a/R/calculate_hash.R b/R/calculate_hash.R index bb5c9887..eb6a3bd2 100644 --- a/R/calculate_hash.R +++ b/R/calculate_hash.R @@ -9,13 +9,15 @@ #' #' @param ... (`any`)\cr #' Objects to hash. +#' @param .list (`list()`)\cr +#' Additional objects to hash. #' #' @return (`character(1)`). #' @export #' @examples #' calculate_hash(iris, 1, "a") -calculate_hash = function(...) { - digest(lapply(list(...), hash_input), algo = "xxhash64") +calculate_hash = function(..., .list = list()) { + digest(lapply(c(list(...), .list), hash_input), algo = "xxhash64") } #' Hash Input @@ -54,3 +56,12 @@ hash_input.default = function(x) { x } +#' @describeIn hash_input +#' If the R6 object has a `$hash` slot, it is returned. +#' Otherwise, the object is returned as is. +#' @export +hash_input.R6 = function(x) { + # In the following we also avoid accessing `val$hash` twice, because it could + # potentially be an expensive AB. + get0("hash", x, mode = "any", inherits = FALSE, ifnotfound = x) +} diff --git a/R/topo_sort.R b/R/topo_sort.R index b673809d..b3d5c09d 100644 --- a/R/topo_sort.R +++ b/R/topo_sort.R @@ -50,7 +50,7 @@ topo_sort = function(nodes) { } j = (j %% n) + 1L # inc j, but wrap around end if (j == 1L) { # we wrapped, lets remove nodes of current layer from deps - layer = nodes[.(depth_count), id, on = "depth", nomatch = NULL] + layer = nodes[list(depth_count), id, on = "depth", nomatch = NULL] if (length(layer) == 0L) { stop("Cycle detected, this is not a DAG!") } diff --git a/inst/testthat/helper_test_mlr3component.R b/inst/testthat/helper_test_mlr3component.R new file mode 100644 index 00000000..7fb3e7cb --- /dev/null +++ b/inst/testthat/helper_test_mlr3component.R @@ -0,0 +1,428 @@ +#' @title Mlr3Component Autotest Suite +#' +#' @description +#' Autotests for [`Mlr3Component`] subclasses. +#' +#' @details +#' Run `expect_mlr3component_subclass()` that verify various assumptions that subclasses of [`Mlr3Component`] should +#' fulfill. +#' +#' @param compclass (`character(1)`) +#' The class of the component to test. +#' @param constargs (`list`) +#' A list of construction arguments to pass to the component. +#' @param expect_congruent_man (`logical(1)`) +#' Whether to expect the `man` field to be of the form `package::_`. +#' @param check_package_export (`logical(1)`) +#' Whether to check that the component is exported from the package. +#' @param dict_package (`character(1)` | `environment`) +#' The package that contains the dictionary. +#' If `NULL`, the package of the component is used. +#' An `environment` can also be passed, which will then be used directly. +#' +#' @export +expect_mlr3component_subclass = function(compclass, constargs, check_congruent_man = TRUE, check_package_export = TRUE, + dict_package = NULL +) { + + checkmate::assert_list(constargs, names = "named") + checkmate::assert_flag(check_congruent_man) + checkmate::assert_flag(check_package_export) + checkmate::assert( + checkmate::check_string(dict_package, null.ok = TRUE), + checkmate::check_environment(dict_package, null.ok = TRUE) + ) + + old_options = options(mlr3.on_deprecated_mlr3component = "error") + on.exit(options(old_options)) + + checkmate::expect_class(compclass, "R6ClassGenerator") + + object = do.call(compclass$new, constargs) + + checkmate::expect_r6(object, "Mlr3Component", cloneable = TRUE, + public = c("id", "label", "param_set", "packages", "properties", "hash", "phash", + "format", "print", "help", "configure", "override_info", "man", "initialize" + ), + private = c(".additional_phash_input", "deep_clone", ".additional_configuration", ".dict_entry", ".representable") + ) + + + checkmate::expect_string(object$id, min.chars = 1L) + checkmate::expect_string(object$label, min.chars = 1L) + checkmate::expect_class(object$param_set, "ParamSet", null.ok = TRUE) + checkmate::expect_character(object$packages, any.missing = FALSE, min.chars = 1L) + checkmate::expect_character(object$properties, any.missing = FALSE, min.chars = 1L) + checkmate::expect_string(object$hash, min.chars = 1L) + checkmate::expect_string(object$phash, min.chars = 1L) + checkmate::expect_string(object$man, pattern = "^[^:]+::[^:]+$") + + checkmate::expect_flag(mlr3misc::get_private(object)$.has_id) + checkmate::expect_string(mlr3misc::get_private(object)$.dict_entry, min.chars = 1L) + checkmate::expect_string(mlr3misc::get_private(object)$.dict_shortaccess, min.chars = 1L) + checkmate::expect_character(mlr3misc::get_private(object)$.additional_configuration, + any.missing = FALSE, min.chars = 1L, unique = TRUE) + checkmate::expect_flag(mlr3misc::get_private(object)$.representable) + + dict_entry = mlr3misc::get_private(object)$.dict_entry + dict_shortaccess = mlr3misc::get_private(object)$.dict_shortaccess + additional_configuration = mlr3misc::get_private(object)$.additional_configuration + construction_arguments = as.character(names(formals(object$initialize))) + + all_fields_list = list() + recurse_class = compclass + while (!is.null(recurse_class)) { + all_fields_list[[length(all_fields_list) + 1]] = c(names(recurse_class$public_fields), names(recurse_class$active)) + recurse_class = recurse_class$get_inherit() + } + all_fields = unique(unlist(all_fields_list)) + # the following fields are part of the base class and can therefore not be part of the additional configuration + all_fields = setdiff(all_fields, c("man", "properties", "packages", "hash", "phash", "id", "label", "param_set")) + checkmate::expect_subset(additional_configuration, all_fields, + info = "additional_configuration is a subset of all valid fields") + checkmate::expect_subset(construction_arguments, all_fields, + info = "construction arguments are accessible as fields") + checkmate::expect_disjunct(construction_arguments, additional_configuration, + info = "construction arguments and additional_configuration should be disjoint") + if (!is.null(object$param_set)) { + checkmate::expect_disjunct(construction_arguments, object$param_set$ids(), + info = "construction arguments and param_set IDs should be disjoint") + checkmate::expect_disjunct(additional_configuration, object$param_set$ids(), + info = "additional_configuration and param_set IDs should be disjoint") + } + + testthat::expect_output(print(object), object$id, fixed = TRUE, info = "print output contains id") + + checkmate::expect_string(object$format(), pattern = "^<[^>]+>$") + + testthat::expect_true(isTRUE(mlr3misc::get_private(object)$.has_id) || identical(dict_entry, object$id)) + + oldhash = object$hash + oldphash = object$phash + + if (isTRUE(mlr3misc::get_private(object)$.has_id)) { + oldid = object$id + object$id = "newid" + testthat::expect_equal(object$id, "newid", info = "id can be set") + testthat::expect_true(object$hash != oldhash, info = "hash changes with id") + testthat::expect_true(object$phash != oldphash, info = "phash changes with id") + object$id = oldid + testthat::expect_equal(object$id, oldid, info = "id can be reset") + testthat::expect_equal(object$hash, oldhash, info = "hash is reset by id reset") + testthat::expect_equal(object$phash, oldphash, info = "phash is reset by id reset") + object$configure(id = "newid2") + testthat::expect_equal(object$id, "newid2", info = "id can be set via configure") + object$configure(id = oldid) + testthat::expect_equal(object$id, oldid, info = "id can be reset via configure") + + } + + object2 = do.call(compclass$new, constargs) + object_clone = object$clone(deep = TRUE) + + expect_deep_clone(object, object2) + expect_deep_clone(object, object_clone) + + eligibleparams = NULL + + + + if (!is.null(object$param_set) || length(object$param_set$ids()) > 0L) { + # we now check if configuration parameters can be changed directly and through configure and whether that affects + # hash but not phash. We do this by automatically generating a parameter value that deviates from the automatically + # constructed one. However, for ParamUty we can't do that, so if there are only 'ParamUty' parameters we skip this + # part. + tops = object$param_set + eligibleparams = which( + tops$class != "ParamUty" & + # filter out discrete params with only one level, or the numeric parameters with $lower == $upper + # Note that numeric parameters have 0 levels, and discrete parameters have $lower == $upper (== NA) + ( + (!is.na(tops$lower) & tops$lower != tops$upper) | + (is.finite(tops$nlevels) & tops$nlevels > 1) + ) & + !mlr3misc::map_lgl(tops$values[tops$ids()], is.null) + ) + } + if (length(eligibleparams)) { + testingparam = tops$ids()[[eligibleparams[[1]]]] + origval = object$param_set$values[[testingparam]] + if (!is.atomic(origval)) origval = NULL + if (tops$class[[testingparam]] %in% c("ParamLgl", "ParamFct")) { + candidates = tops$levels[[testingparam]] + } else { + candidates = Filter(function(x) is.finite(x) && !is.na(x), + c(tops$lower[[testingparam]], tops$upper[[testingparam]], tops$lower[[testingparam]] + 1, 0, origval + 1)) + } + val = setdiff(candidates, origval)[1] + parvals = list(val) + names(parvals) = testingparam + + parvals_orig = list(origval) + names(parvals_orig) = testingparam + + object$param_set$values[[testingparam]] = val + testthat::expect_equal(object$param_set$values[[testingparam]], val, info = "parameters can be set directly") + changedhash = object$hash + testthat::expect_false(object$hash == oldhash, info = "hash changes with parameter set") + testthat::expect_equal(object$phash, oldphash, info = "phash is unaffected by parameter set") + testthat::expect_equal(object_clone$param_set$values[[testingparam]], origval, + info = "params of cloned objects are distinct") + object_changed_params = object$clone(deep = TRUE) + + object$param_set$values[[testingparam]] = origval + testthat::expect_equal(object$param_set$values[[testingparam]], origval, info = "parameters can be reset directly") + testthat::expect_equal(object$hash, oldhash, info = "hash is changed back by resetting parameter") + testthat::expect_equal(object$phash, oldphash, info = "phash is unaffected by parameter set reset") + + object$configure(.values = parvals) + testthat::expect_equal(object$param_set$values[[testingparam]], val, + info = "configure can set parameters via .values") + testthat::expect_equal(object$hash, changedhash, info = "hash changes with parameter set through configure") + testthat::expect_equal(object$phash, oldphash, info = "phash is unaffected by parameter set through configure") + testthat::expect_equal(object_clone$param_set$values[[testingparam]], origval, + info = "params of cloned objects are distinct") + object$configure(.values = parvals_orig) + testthat::expect_equal(object$param_set$values[[testingparam]], origval, + info = "configure can reset parameters via .values") + testthat::expect_equal(object$hash, oldhash, + info = "hash is unaffected by parameter set reset through configure") + testthat::expect_equal(object$phash, oldphash, + info = "phash is unaffected by parameter set reset through configure") + + + do.call(object$configure, parvals) + testthat::expect_equal(object$param_set$values[[testingparam]], val, info = "configure can set parameters via ...") + testthat::expect_equal(object$hash, changedhash, info = "hash changes with parameter set through configure") + testthat::expect_equal(object$phash, oldphash, info = "phash is unaffected by parameter set through configure") + testthat::expect_equal(object_clone$param_set$values[[testingparam]], origval, + info = "params of cloned objects are distinct") + do.call(object$configure, parvals_orig) + testthat::expect_equal(object$param_set$values[[testingparam]], origval, + info = "configure can reset parameters via ...") + testthat::expect_equal(object$hash, oldhash, + info = "hash is unaffected by parameter set reset through configure") + testthat::expect_equal(object$phash, oldphash, + info = "phash is unaffected by parameter set reset through configure") + + } + + if (is.null(dict_package)) { + dict_environment = topenv(object$.__enclos_env__) + } else if (is.environment(dict_package)) { + dict_environment = dict_package + } else { + dict_environment = asNamespace(dict_package) + } + + dict_constructor = get(dict_shortaccess, mode = "function", envir = dict_environment) + # dict_constructor is something like 'lrn', 'po', 'flt' etc. + checkmate::expect_function(dict_constructor) + + dictionary = dict_constructor() + checkmate::expect_r6(dictionary, "Dictionary") # expect an mlr3misc::Dictionary here + name_of_dictionary = Filter(function(x) identical(dict_environment[[x]], dictionary), names(dict_environment))[1] + checkmate::expect_string(name_of_dictionary, min.chars = 1L, info = "name of dictionary in dict_environment") + + + dict_object = do.call(function(...) dict_constructor(dict_entry, ...), constargs) + testthat::expect_equal(dict_object, object, info = "object from dictionary is congruent with object") + if (length(eligibleparams)) { + dict_object2 = do.call(dict_constructor, c(list(dict_entry), mlr3misc::insert_named(constargs, parvals))) + testthat::expect_equal(dict_object2, object_changed_params, + info = "object from dictionary constructed with changed parameters") + + dict_object3 = do.call(dict_constructor, c(list(dict_entry), mlr3misc::insert_named(constargs, parvals_orig))) + testthat::expect_equal(dict_object3, object, info = "object from dictionary constructed with original parameter") + } + + if (check_congruent_man || check_package_export) { + expected_package = topenv(object$.__enclos_env__)$.__NAMESPACE__.$spec[["name"]] + } + + if (check_congruent_man) { + checkmate::expect_string(object$man, pattern = sprintf("^%s::[^:]+$", expected_package)) + help_info = strsplit(object$man, "::")[[1]] + help_topic = utils::help.search(sprintf("^%s$", help_info[[2]]), package = help_info[[1]], ignore.case = FALSE, + agrep = FALSE, fields = "alias")$matches$Topic + testthat::expect_true(length(help_topic) > 0L, info = "help page exists") + testthat::expect_equal(help_topic, + sprintf("%s_%s", name_of_dictionary, dict_entry), + info = "help page name is congruent with dict_name and dict_entry") + } + + if (check_package_export) { + nspath = dirname(system.file("NAMESPACE", package = expected_package)) + exports = parseNamespaceFile(basename(nspath), dirname(nspath))$exports + testthat::expect_true(class(object)[[1]] %in% exports, info = "class is exported") + } + + construction_conf_objects = mget(construction_arguments, envir = object) + additional_conf_objects = mget(additional_configuration, envir = object) + + # constructing and configuring with original configuration values should not have an effect + + object_explicit_construction = do.call(compclass$new, construction_conf_objects) + testthat::expect_equal(object_explicit_construction, object, + info = "construction with arguments retrieved from formals of initialize is equivalent to construction without" + ) + + object3 = do.call(compclass$new, constargs) + + object3$configure(.values = additional_conf_objects) + testthat::expect_equal(object3, object, info = "configure with additional configuration does not change object") + + dict_object4 = do.call(dict_constructor, c(list(dict_entry), construction_conf_objects, additional_conf_objects)) + testthat::expect_equal(dict_object4, object, + info = "object can be constructed from initialize-formals and additional_configuration") + + if (!is.null(object$param_set)) { + dict_object5 = do.call(dict_constructor, + c(list(dict_entry), construction_conf_objects, additional_conf_objects, object$param_set$values)) + testthat::expect_equal(dict_object5, object, + info = "object can be constructed from initialize-formals, additional_configuration, and parameter set") + } +} + +#' @title Test many Mlr3Component subclasses +#' +#' @description +#' For a given list of Mlr3Component subclasses, run [expect_mlr3component_subclass()] for each of them. +#' +#' This function calls [testthat::test_that()], so it should *not* be run inside a `test_that()`-block. +#' Instead, it should be run at top level in a test file directly. +#' +#' @param compclasses (`list` of `R6ClassGenerator`) +#' The list of Mlr3Component subclasses to test. +#' It is a good idea to auto-generate this, see examples. +#' @param dict_constargs (`list`) +#' A list of lists of construction arguments to pass to the dictionary. +#' This list should be named by the class of the component for which they apply, but does not need to be exhaustive. +#' Components not mentioned in the list get empty construction arguments by default. +#' @param check_congruent_man (`logical(1)`) +#' Whether to check that the `man` field is congruent with the dictionary name and entry. +#' @param check_package_export (`logical(1)`) +#' Whether to check that the component is exported from the package. +#' @param dict_package (`character(1)` | `environment`) +#' The package that contains the dictionary. +#' If `NULL`, the package of the component is used. +#' An `environment` can also be passed, which will then be used directly. +#' +#' @examples +#' abstracts = c("PipeOp", "PipeOpEnsemble") +#' nspath = dirname(system.file("NAMESPACE", package = "mlr3pipelines")) +#' exports = parseNamespaceFile(basename(nspath), dirname(nspath))$exports +#' compclass_names = setdiff(grep(exports, pattern = "^PipeOp", value = TRUE), abstract_classes) +#' compclasses = lapply(compclass_names, get, envir = asNamespace("mlr3pipelines")) +#' dict_constargs = list( +#' PipeOpImputeLearner = list(learner = mlr_learners$get("classif.rpart")), +#' PipeOpLearner = list(learner = mlr_learners$get("classif.rpart")), +#' PipeOpLearnerCV = list(learner = mlr_learners$get("classif.rpart")), +#' ) +#' test_that_mlr3component_dict(compclasses, dict_constargs, dict_package = "mlr3pipelines") +#' @export + +test_that_mlr3component_dict = function(compclasses, dict_constargs, check_congruent_man = TRUE, + check_package_export = TRUE, dict_package = NULL +) { + for (compclass in compclasses) { + clname = compclass$classname + if (is.null(clname)) { + clname = "ERROR" # we don't want top level code to fail. + } + testthat::test_that(sprintf("Mlr3Component subclass %s", clname), { + expect_mlr3component_subclass( + compclass = compclass, + constargs = c(list(), dict_constargs[[clname]]), + check_congruent_man = check_congruent_man, + check_package_export = check_package_export, + dict_package = dict_package + ) + }) + } +} + + +#' @title Expect that 'one' is a deep clone of 'two' +#' +#' @description +#' Expect that 'one' is a deep clone of 'two'. +#' +#' @param one (`any`) +#' The first object to compare. +#' @param two (`any`) +#' The second object to compare. +#' +#' @export +expect_deep_clone = function(one, two) { + # is equal + testthat::expect_equal(one, two) + visited = new.env() # nolint + visited_b = new.env() # nolint + expect_references_differ = function(a, b, path) { + + force(path) + if (length(path) > 400) { + stop("Recursion too deep in expect_deep_clone()") + } + + # don't go in circles + addr_a = data.table::address(a) + addr_b = data.table::address(b) + if (!is.null(visited[[addr_a]])) { + return(invisible(NULL)) + } + visited[[addr_a]] = path + visited_b[[addr_b]] = path + + # follow attributes, even for non-recursive objects + if (utils::tail(path, 1) != "[attributes]" && !is.null(base::attributes(a))) { + Recall(base::attributes(a), base::attributes(b), c(path, "[attributes]")) + } + + # don't recurse if there is nowhere to go + if (!base::is.recursive(a)) { + return(invisible(NULL)) + } + + # check that environments differ + if (base::is.environment(a)) { + # some special environments + if (identical(a, baseenv()) || identical(a, globalenv()) || identical(a, emptyenv())) { + return(invisible(NULL)) + } + if (length(path) > 1 && R6::is.R6(a) && !"clone" %in% names(a)) { + return(invisible(NULL)) # don't check if smth is not cloneable + } + label = sprintf("Object addresses differ at path %s", paste0(path, collapse = "->")) + testthat::expect_true(addr_a != addr_b, label = label) + testthat::expect_null(visited_b[[addr_a]], label = label) + } else { + a <- unclass(a) + b <- unclass(b) + } + + # recurse + if (base::is.function(a)) { + return(invisible(NULL)) + ## # maybe this is overdoing it + ## Recall(base::formals(a), base::formals(b), c(path, "[function args]")) + ## Recall(base::body(a), base::body(b), c(path, "[function body]")) + } + objnames = base::names(a) + if (is.null(objnames) || anyDuplicated(objnames)) { + index = seq_len(base::length(a)) + } else { + index = objnames + if (base::is.environment(a)) { + index = Filter(function(x) !bindingIsActive(x, a), index) + } + } + for (i in index) { + if (utils::tail(path, 1) == "[attributes]" && i %in% c("srcref", "srcfile", ".Environment")) next + Recall(base::`[[`(a, i), base::`[[`(b, i), c(path, sprintf("[element %s]%s", i, + if (!is.null(objnames)) sprintf(" '%s'", if (is.character(index)) i else objnames[[i]]) else ""))) + } + } + expect_references_differ(one, two, "ROOT") +} diff --git a/man/Mlr3Component.Rd b/man/Mlr3Component.Rd new file mode 100644 index 00000000..54274423 --- /dev/null +++ b/man/Mlr3Component.Rd @@ -0,0 +1,296 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Mlr3Component.R +\name{Mlr3Component} +\alias{Mlr3Component} +\title{Common Base Class for mlr3 Components} +\description{ +A pragmatic, lightweight base class that captures common patterns across the mlr3 ecosystem. +It standardizes various fields and provides shared methods for printing, help lookup, setting parameter values and fields, and hashing. + +Semantically, an mlr3 component is usually an object representing an algorithm or a method, such as a \link[mlr3:Learner]{mlr3::Learner} or a \link[mlr3:Measure]{mlr3::Measure}. +This algorithm can be configured though its parameters, accessible as the \link[paradox:ParamSet]{paradox::ParamSet} in the \verb{$param_set} field, as well as +through various other custom algorithm-specific fields. +All of these together can be changed via the \verb{$configure()} method. +Some components, such as prominently the \link[mlr3:Learner]{mlr3::Learner}, also have "state", such as the learned model. + +The identity of an object represented by an \code{\link{Mlr3Component}} is sometimes important, for example when aggregating benchmark results accross various settings of different algorithms used in tha benchmark, such as different \link[mlr3:Learner]{mlr3::Learner}s or different \link[mlr3:Measure]{mlr3::Measure}. +For this,the component provides a \verb{$hash} field, which should identify the algorithm and its configuration, without including the state. +There is also the \verb{$phash} field, which identifies the algorithm without its \verb{$param_set} configuration -- this is used when aggregating benchmark results for individual algorithms when these algorithms were evaluated for different configuration parameter settings. + +\code{\link{Mlr3Component}}s should usually be constructed from a \link{Dictionary}, which should be accessible via a short-form, such as \link[mlr3:mlr_sugar]{mlr3::lrn} or \link[mlr3pipelines:po]{mlr3pipelines::po}. +} +\section{Inheriting}{ + +To create your own \code{Mlr3Component}, you need to overload the \verb{$initialize()} function. +A concrete class should ideally provide all arguments of the \verb{$initialize()} directly, i.e. the user should not need to provide \code{id} or \code{param_set}. + +The information contained in a concrete mlr3 component should usually be completely determined by four things: +\enumerate{ +\item The \emph{construction arguments} given to the \verb{$initialize()} method of the concrete class. +These can be \code{\link{Mlr3Component}}s themselves, or configuration options that do not naturally fit into the \link[paradox:ParamSet]{paradox::ParamSet}. +These arguments should \emph{not} overlap with the \link[paradox:ParamSet]{paradox::ParamSet} parameters, and should not be the construction arguments of the +abstract \link{Mlr3Component} base class such as \code{id} or \code{packages}. +\item The \emph{configuration arguments} inside \verb{$param_set$values}. +\item Additional \emph{configuration settings} that influence the behavior of the component, but are not part of the \link[paradox:ParamSet]{paradox::ParamSet} because they do not naturally constitute a dimension that could be optimized. +\item Some additional \emph{state information}, storing the result of the algorithm, such as the learned model, often contained in a field called \verb{$state}. +} + +Information from 1. should also be made accessible as active bindings, with the same name as the construction arguments. + +The information from 1., and 3. is contained in the \verb{$phash} value. +For this, the \code{private$.additional_phash_input()} function needs to be overloaded by subclasses. +It is often sufficient for an abstract subclass to implement this, and concrete classes to inherit from this. E.g. the \link[mlr3:Learner]{mlr3::Learner} class implements \verb{$private$.additional_phash_input()} to return the necessary iformation to be included in the \verb{$phash} for almost all possible \link[mlr3:Learner]{mlr3::Learner}s. +Only concrete \link[mlr3:Learner]{mlr3::Learner}s that contain additional information not contained in one of the standard fields needs to overload the function again, such as e.g. \link[mlr3tuning:AutoTuner]{mlr3tuning::AutoTuner}. +It is best if this second overload only collects the additional information not contained in the abstract base class, and also calls \code{super$.additional_phash_input()}. + +The information from 1., 2., and 3. together is contained in the \verb{$phash} value. +It is also collected automatically from the \code{private$.additional_phash_input()} function, as well as the \verb{$param_set$values} field. + +The information from 3. should be made available through the \code{additional_configuration} construction argument of \code{\link{Mlr3Component}}. +} + +\section{Cloning}{ + +\code{\link{Mlr3Component}} implements a \code{private$deep_clone()} method that automatically clones R6 objects stored directly in the object, as well as in \verb{$param_set$values}. +Because of the way the \verb{$param_set} field is handled, subclasses that need to do additional cloning should overload this function, but always call \code{super$deep_clone(name, value)} for values they do not handle. +} + +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{id}}{(\code{character(1)}) +Identifier of the object. +Used in tables, plot and text output.} + +\item{\code{label}}{(\code{character(1)}) +Human-friendly label. +Can be used in tables, plot and text output instead of the ID. +Auto-generated from the title of the help page.} + +\item{\code{packages}}{(\code{character()}) +Set of required packages. +These packages are loaded, but not attached. +Absence of these packages will generate a warning during construction.} + +\item{\code{properties}}{(\code{character()})\cr +Stores a set of properties/capabilities the object has. +These are set during construction and should not be changed afterwards. +They may be "optimistic" in the sense that the true capabilities could depend on specific configuration +parameter settings; +\verb{$properties} then indicate the capabilities under favorable configuration settings.} + +\item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{NULL}) +Set of hyperparameters.} + +\item{\code{man}}{(\code{character(1)}) +String in the format \verb{[pkg]::[class name]} pointing to a manual page for this object. +Inferred automatically from the class name and package in which the class is defined. +If a concrete class is not defined in a package, the help page of its first parent class with a help page is +used. +Can be overridden with the \verb{$override_info()} method.} + +\item{\code{hash}}{(\code{character(1)}) +Stable hash that includes id, parameter values (if present) and additional configuration settings (from +construction or class fields) but not state. +Makes use of the \code{private$.additional_phash_input()} function to collect additional information, which must +therefore be implemented by subclasses.} + +\item{\code{phash}}{(\code{character(1)}) +Hash that includes id and additional configuration settings (from construction or class fields) but not +parameter values and no state. +Makes use of the \code{private$.additional_phash_input()} function to collect additional information, which must +therefore be implemented by subclasses.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-Mlr3Component-new}{\code{Mlr3Component$new()}} +\item \href{#method-Mlr3Component-format}{\code{Mlr3Component$format()}} +\item \href{#method-Mlr3Component-print}{\code{Mlr3Component$print()}} +\item \href{#method-Mlr3Component-help}{\code{Mlr3Component$help()}} +\item \href{#method-Mlr3Component-configure}{\code{Mlr3Component$configure()}} +\item \href{#method-Mlr3Component-override_info}{\code{Mlr3Component$override_info()}} +\item \href{#method-Mlr3Component-clone}{\code{Mlr3Component$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-new}{}}} +\subsection{Method \code{new()}}{ +Construct a new \code{Mlr3Component}. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$new( + dict_entry, + dict_shortaccess, + id = dict_entry, + param_set = ps(), + packages = character(0), + properties = character(0), + additional_configuration = character(0), + representable = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{dict_entry}}{(\code{character(1)}) +The entry in the dictionary by which this component can be constructed.} + +\item{\code{dict_shortaccess}}{(\code{character(1)}) +Name of the short access function for the dictionary that this component can be found under. +\code{get(dict_shortaccess, mode = "function")(dict_entry)} should create an object of the concrete class.} + +\item{\code{id}}{(\code{character(1)}) +The ID of the constructed object. +ID field can be used to identify objects in tables or plots, and sometimes to prefix parameter names in +combined \link[paradox:ParamSet]{paradox::ParamSet}s. +If instances of a given abstract subclass should all not have IDs, this should be set to \code{NULL}. +Should default to the value of \code{dict_entry} in most cases, with few exceptions for wrapper objects (e.g. a +\link[mlr3pipelines:PipeOp]{mlr3pipelines::PipeOp} wrapping a \link[mlr3:Learner]{mlr3::Learner}).} + +\item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{list} | \code{NULL}) +Parameter space description. This should be created by the subclass and given to \code{super$initialize()}. +If this is a \code{\link[paradox:ParamSet]{ParamSet}}, it is used for \verb{$param_set} directly. +Otherwise it must be a \code{list} of expressions e.g. created by \code{alist()} that evaluate to +\code{\link[paradox:ParamSet]{ParamSet}}s. +These \code{\link[paradox:ParamSet]{ParamSet}} are combined using a +\code{\link[paradox:ParamSetCollection]{ParamSetCollection}}.\cr +If instances of a given abstract subclass should all not have a \link[paradox:ParamSet]{paradox::ParamSet}, this should be set to +\code{NULL}. +Otherwise, if a concrete subclass just happens to have an empty search space, the default \code{\link[paradox:ps]{paradox::ps()}} +should be used.} + +\item{\code{packages}}{(\code{character()}) +The packages required by the constructed object. +The constructor will check whether these packages can be loaded and give a warning otherwise. +The packages of the R6 objects in the inheritance hierarchy are automatically added and do not need to be +provided here. +Elements of \code{packages} are deduplicated and made accessible as the \verb{$packages} field.} + +\item{\code{properties}}{(\code{character()}) +A set of properties/capabilities the object has. +These often need to be a subset of an entry in \link[mlr3:mlr_reflections]{mlr3::mlr_reflections}. +However, the \code{\link{Mlr3Component}} constructor does not check this, it needs to be asserted by an abstract +inheriting class. +Elements are deduplicated and made accessible as the \verb{$properties} field.} + +\item{\code{additional_configuration}}{(\code{character()}) +Names of class fields that constitute additional configuration settings that influence the behavior of the +component, but are neither construction argument, nor part of the \link[paradox:ParamSet]{paradox::ParamSet}. +An example is the \verb{$predict_type} field of a \link[mlr3:Learner]{mlr3::Learner}.} + +\item{\code{representable}}{(\code{logical(1)}) +Whether the object can be represented as a simple string. +Should generally be \code{TRUE} except for objects that are constructed with a large amount of data, such as +\link[mlr3:Task]{mlr3::Task}s.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-format}{}}} +\subsection{Method \code{format()}}{ +Helper for print outputs. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$format(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(ignored).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-print}{}}} +\subsection{Method \code{print()}}{ +Printer. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(ignored).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-help}{}}} +\subsection{Method \code{help()}}{ +Opens the corresponding help page referenced by field \verb{$man}. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$help()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-configure}{}}} +\subsection{Method \code{configure()}}{ +Set parameter values and fields in one step. +Named arguments overlapping with the \code{\link[paradox:ParamSet]{ParamSet}} are set as parameters; +remaining arguments are assumed to be regular fields of the object. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$configure(..., .values = list())}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(named \code{any})} + +\item{\code{.values}}{(named \code{list()})} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-override_info}{}}} +\subsection{Method \code{override_info()}}{ +Override the \code{man} and \code{hash} fields. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$override_info(man = NULL, hash = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{man}}{(\code{character(1)} | \code{NULL}) +The manual page of the component.} + +\item{\code{hash}}{(\code{character(1)} | \code{NULL}) +The hash of the component.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/calculate_hash.Rd b/man/calculate_hash.Rd index e410ee62..a0709077 100644 --- a/man/calculate_hash.Rd +++ b/man/calculate_hash.Rd @@ -4,11 +4,14 @@ \alias{calculate_hash} \title{Calculate a Hash for Multiple Objects} \usage{ -calculate_hash(...) +calculate_hash(..., .list = list()) } \arguments{ \item{...}{(\code{any})\cr Objects to hash.} + +\item{.list}{(\code{list()})\cr +Additional objects to hash.} } \value{ (\code{character(1)}). diff --git a/man/hash_input.Rd b/man/hash_input.Rd index 8dc9ccc6..8654a721 100644 --- a/man/hash_input.Rd +++ b/man/hash_input.Rd @@ -5,6 +5,7 @@ \alias{hash_input.function} \alias{hash_input.data.table} \alias{hash_input.default} +\alias{hash_input.R6} \title{Hash Input} \usage{ hash_input(x) @@ -14,6 +15,8 @@ hash_input(x) \method{hash_input}{data.table}(x) \method{hash_input}{default}(x) + +\method{hash_input}{R6}(x) } \arguments{ \item{x}{(\code{any})\cr @@ -33,4 +36,7 @@ The conversion to a list ensures that keys and indices are not included in the h \item \code{hash_input(default)}: Returns the object as is. +\item \code{hash_input(R6)}: If the R6 object has a \verb{$hash} slot, it is returned. +Otherwise, the object is returned as is. + }} diff --git a/man/mlr3component_deprecation_msg.Rd b/man/mlr3component_deprecation_msg.Rd new file mode 100644 index 00000000..8a0f7bc4 --- /dev/null +++ b/man/mlr3component_deprecation_msg.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Mlr3Component.R +\name{mlr3component_deprecation_msg} +\alias{mlr3component_deprecation_msg} +\title{Deprecation Message related to the \code{Mlr3Component} Class} +\usage{ +mlr3component_deprecation_msg(msg) +} +\arguments{ +\item{msg}{(\code{character(1)}) +Message to print.} +} +\description{ +Will give different messages depending on deprecation progression and will be more agressive in tests than +interactively. +} +\keyword{internal} diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 36eadc66..95b5e581 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -9,3 +9,7 @@ expect_man_exists = function(man) { checkmate::expect_data_frame(matches$matches, min.rows = 1L, info = "man page lookup") } } + +lapply(list.files(system.file("testthat", package = "mlr3misc"), pattern = "^helper.*\\.[rR]", full.names = TRUE), + source +) diff --git a/tests/testthat/test_Mlr3Component.R b/tests/testthat/test_Mlr3Component.R new file mode 100644 index 00000000..a9969fe2 --- /dev/null +++ b/tests/testthat/test_Mlr3Component.R @@ -0,0 +1,120 @@ +skip_if_not_installed("paradox") + +MiniBaseclass = R6Class("MiniBaseclass", inherit = Mlr3Component, + public = list( + initialize = function(id, param_set = ps(), properties = character(0), + packages = character(0), additional_configuration = character(0)) { + super$initialize(dict_entry = id, dict_shortaccess = "mini", + param_set = param_set, properties = properties, packages = packages, + additional_configuration = c("some_configurable", additional_configuration) + ) + private$.some_configurable = TRUE + } + ), + active = list( + some_configurable = function(value) { + if (!missing(value)) { + private$.some_configurable = value + } + private$.some_configurable + } + ), + private = list( + .some_configurable = NULL, + .additional_phash_input = function() { + list(private$.some_configurable, super$.additional_phash_input()) + } + ) +) + +mlr_mini = R6Class("DictionaryMini", inherit = Dictionary, cloneable = FALSE, +)$new() + +mini = function(.key, ...) { + dictionary_sugar_get(dict = mlr_mini, .key, ...) +} + +MiniConcrete = R6Class("MiniConcrete", inherit = MiniBaseclass, + public = list( + initialize = function(constarg) { + super$initialize(id = "concrete", + param_set = paradox::ps(x = paradox::p_dbl(0, 10, init = 2)), properties = "xyz", + packages = "data.table", # we know this one is present + additional_configuration = "some_other_configurable" + ) + private$.constarg = constarg + private$.some_other_configurable = FALSE + } + ), + active = list( + some_other_configurable = function(value) { + if (!missing(value)) { + private$.some_other_configurable = value + } + private$.some_other_configurable + }, + constarg = function(value) { + if (!missing(value)) { + private$.constarg = value + } + private$.constarg + } + ), + private = list( + .some_other_configurable = NULL, + .constarg = NULL, + .additional_phash_input = function() { + list(private$.some_other_configurable, super$.additional_phash_input()) + } + ) +) + +mlr_mini$add("concrete", MiniConcrete) + + +test_that("Mlr3Component basic tests", { + object = mini("concrete", constarg = "testvalue") + expect_equal(object$constarg, "testvalue") + expect_equal(object$some_other_configurable, FALSE) + expect_equal(object$some_configurable, TRUE) + expect_equal(object$id, "concrete") + expect_equal(object$param_set$values, list(x = 2)) + object$id = "newid" + expect_equal(object$id, "newid") + expect_string(object$hash) + expect_string(object$phash) + expect_equal(object$format(), "") + expect_output(object$print(), " \\(newid\\)") + + object$configure(x = 3, some_configurable = FALSE, id = "newerid") + expect_equal(object$some_configurable, FALSE) + expect_equal(object$id, "newerid") + expect_equal(object$param_set$values, list(x = 3)) + + # The man page is Mlr3Component, since MiniBaseclass and MiniConcrete don't have help pages + expect_equal(object$man, "mlr3misc::Mlr3Component") + expect_equal(object$packages, "data.table") + expect_equal(object$properties, "xyz") + + object$override_info(man = "mlr3misc::MiniConcrete", hash = "abcxyz") + expect_equal(object$man, "mlr3misc::MiniConcrete") + expect_equal(object$hash, "abcxyz") + expect_equal(object$phash, "abcxyz") + object$param_set$values$x = 4 + + expect_equal(object$phash, "abcxyz") + expect_true(object$hash != "abcxyz") + + object$param_set$values$x = 3 + expect_equal(object$hash, "abcxyz") + expect_equal(object$phash, "abcxyz") +}) + + +test_that_mlr3component_dict( + compclasses = list(MiniConcrete), + dict_constargs = list(MiniConcrete = list(constarg = "testconstargval")), + check_congruent_man = FALSE, # test-class does not have correct man page value + check_package_export = FALSE, # test-class is not exported + dict_package = environment() # dictionary is in this environment +) diff --git a/tests/testthat/test_encapsulate.R b/tests/testthat/test_encapsulate.R index 4f6cb965..3e7d2c21 100644 --- a/tests/testthat/test_encapsulate.R +++ b/tests/testthat/test_encapsulate.R @@ -112,7 +112,7 @@ test_that("try", { test_that("rng state is transferred", { - rng_state = .GlobalEnv$.Random.seed + rng_state = get_seed() on.exit({.GlobalEnv$.Random.seed = rng_state}) fun = function() {