From 279fbee64f4049a1973a0c9005b1bd9854dbb9b7 Mon Sep 17 00:00:00 2001 From: mb706 Date: Mon, 11 Aug 2025 15:08:56 +0200 Subject: [PATCH 01/22] first steps --- .lintr | 3 +- DESCRIPTION | 1 + NAMESPACE | 2 + R/Mlr3Component.R | 299 ++++++++++++++++++++++++++++++++++++++++++ R/calculate_hash.R | 15 ++- man/Mlr3Component.Rd | 208 +++++++++++++++++++++++++++++ man/calculate_hash.Rd | 5 +- man/hash_input.Rd | 6 + 8 files changed, 535 insertions(+), 4 deletions(-) create mode 100644 R/Mlr3Component.R create mode 100644 man/Mlr3Component.Rd diff --git a/.lintr b/.lintr index ee2ad764..44be3152 100644 --- a/.lintr +++ b/.lintr @@ -5,5 +5,6 @@ linters: linters_with_defaults( object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names cyclocomp_linter = NULL, # do not check function complexity commented_code_linter = NULL, # allow code in comments - line_length_linter = line_length_linter(120) + line_length_linter = line_length_linter(120), + indentation_linter = indentation_linter(indent = 2, hanging_indent_style = "never") ) diff --git a/DESCRIPTION b/DESCRIPTION index 93de3aa5..0f32c62d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,6 +44,7 @@ Collate: 'named_list.R' 'Callback.R' 'Context.R' + 'Mlr3Component.R' 'as_factor.R' 'as_short_string.R' 'assert_ro_binding.R' diff --git a/NAMESPACE b/NAMESPACE index 68a4f4f9..db2f05bc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ S3method(distinct_values,default) S3method(distinct_values,factor) S3method(distinct_values,logical) S3method(hash_input,"function") +S3method(hash_input,R6) S3method(hash_input,data.table) S3method(hash_input,default) S3method(insert_named,"NULL") @@ -44,6 +45,7 @@ export("get_private<-") export(Callback) export(Context) export(Dictionary) +export(Mlr3Component) export(as_callback) export(as_callbacks) export(as_factor) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R new file mode 100644 index 00000000..0544926a --- /dev/null +++ b/R/Mlr3Component.R @@ -0,0 +1,299 @@ +#' @title Common Base Class for mlr3 Components +#' +#' @description +#' A pragmatic, lightweight base class that captures common patterns across +#' the mlr3 ecosystem. +#' It standardizes fields like `id`, `label`, `param_set`, `packages`, and `man`, +#' and provides shared methods for printing, help lookup, configuration of +#' parameter values and fields, and stable hashing. +#' +#' Subclasses can specialize behavior by: +#' - Adding additional public/active fields and methods. +#' - Overriding `print()` for richer output. +#' - Extending the hash inputs via `private$.extra_hash` or by overriding +#' the `hash`/`phash` active bindings if necessary. +#' +#' @section Construction: +#' Mlr3Component$new(id, param_set = NULL, packages = character(), +#' label = NA_character_, man = NA_character_) +#' +#' - `id` (`character(1)`): Identifier of the component. +#' - `param_set` ([paradox::ParamSet] | `NULL`): Optional hyperparameter set. +#' If given, its `$values` are used by `$configure()` and are included in the hash. +#' - `packages` (`character()`): Required packages (namespaces). Not attached. +#' +#' @section Inheriting: +#' To create your own `Mlr3Component`, you need to overload the `$initialize()` function to do additional +#' initialization. The `$initialize()` method should have at least the arguments `id` and `param_set`, which should be +#' passed on to `super$initialize()` unchanged. +#' To create a class without a `param_set`, you can pass `NULL` as the `param_set` argument. +#' `packages` should have the default value `character(0)`, meaning no packages are required. +#' `properties` should have the default value `character(0)`, meaning no properties are set. +#' +#' If the `$initialize()` method has more arguments, then it is necessary to also overload the +#' `private$.additional_phash_input()` function. This function should return either all objects, or a hash of all +#' objects, that can change the function or behavior of the `Mlr3Component` and are independent +#' of the class, the id, the `$state`, and the `$param_set$values`. The last point is particularly important: +#' changing the `$param_set$values` should +#' *not* change the return value of `private$.additional_phash_input()`. +#' +#' @export +Mlr3Component = R6Class("Mlr3Component", + lock_objects = FALSE, + public = list( + #' @description + #' Construct a new `Mlr3Component`. + #' @param id (`character(1)`) + #' @param param_set ([paradox::ParamSet] | `NULL`) + #' @param packages (`character()`) + initialize = function(id, param_set = ps(), packages = character(0), properties = character(0)) { + self$id = id + + if (is.null(packages)) { + rm("packages", envir = self) + } + private$.packages = assert_character(packages, any.missing = FALSE, null.ok = TRUE) + + if (is.null(properties)) { + rm("properties", envir = self) + } + private$.properties = assert_character(properties, any.missing = FALSE, null.ok = TRUE) + + # ParamSet is optional to keep this base class generic across components + if (is.null(param_set)) { + private$.param_set = NULL + private$.param_set_source = NULL + rm("param_set", envir = self) + } else if (inherits(param_set, "ParamSet")) { + private$.param_set = assert_param_set(param_set) + private$.param_set_source = NULL + } else { + lapply(param_set, function(x) assert_param_set(eval(x))) + private$.param_set_source = param_set + } + + # upfront check; does not attach + if (length(self$packages)) { + check_packages_installed(self$packages, + msg = sprintf("Package '%%s' required but not installed for %s '%s'", + class(self)[1L], self$id)) + } + lockEnvironment(self) + lockEnvironment(private) + }, + + #' @description + #' Helper for print outputs. + #' @param ... (ignored). + format = function(...) { + sprintf("<%s:%s>", class(self)[1L], self$id) + }, + + #' @description + #' Printer with concise, unified output. + print = function(...) { + msg_h = if (is.null(self$label) || is.na(self$label)) "" else paste0(": ", self$label) + cat_cli({ + cli::cli_h1("{.cls {class(self)[1L]}} ({self$id}){msg_h}") + cli::cli_li("Parameters: {as_short_string(if (is.null(private$.param_set)) list() else private$.param_set$values, 1000L)}") + cli::cli_li("Packages: {.pkg {if (length(self$packages)) self$packages else '-'}}") + }) + }, + + #' @description + #' Opens the corresponding help page referenced by field `$man`. + help = function() { + open_help(self$man) + }, + + #' @description + #' Set parameter values and fields in one step. + #' Named arguments overlapping with the `ParamSet` are set as parameters; + #' remaining arguments are assumed to be regular fields of the object. + #' @param ... (named `any`) + #' @param .values (named `list()`) + configure = function(..., .values = list()) { + dots = list(...) + assert_list(dots, names = "unique") + assert_list(.values, names = "unique") + assert_disjunct(names(dots), names(.values)) + new_values = insert_named(dots, .values) + + # set params in ParamSet if available + if (!is.null(private$.param_set) && length(new_values)) { + param_ids = private$.param_set$ids() + ii = names(new_values) %in% param_ids + if (any(ii)) { + private$.param_set$values = insert_named(private$.param_set$values, new_values[ii]) + new_values = new_values[!ii] + } + } else { + param_ids = character() + } + + # remaining args become fields + if (length(new_values)) { + ndots = names(new_values) + for (i in seq_along(new_values)) { + nn = ndots[[i]] + if (!exists(nn, envir = self, inherits = FALSE)) { + stopf("Cannot set argument '%s' for '%s' (not a parameter, not a field).%s", + nn, class(self)[1L], did_you_mean(nn, setdiff(names(self), ".__enclos_env__"))) # nolint + } + self[[nn]] = new_values[[i]] + } + } + + invisible(self) + }, + + #' @description + #' Convenience: load (not attach) required namespaces. + #' Raises a `packageNotFoundError` if loading fails. + require_namespaces = function() { + if (length(self$packages)) require_namespaces(self$packages) + invisible(self) + } + ), + + active = list( + #' @field id (`character(1)`) + #' Identifier of the object. + #' Used in tables, plot and text output. + id = function(rhs) { + if (!missing(rhs)) { + private$.id = assert_string(rhs, min.chars = 1L) + } + private$.id + }, + + #' @field label (`character(1)`) + #' Human-friendly label. + #' Can be used in tables, plot and text output instead of the ID. + label = function(rhs) { + if (!missing(rhs)) stop("label is read-only") + if (is.null(private$.label)) { + helpinfo = self$help() + helpcontent = NULL + if (inherits(helpinfo, "help_files_with_topic") && length(helpinfo)) { + ghf = get(".getHelpFile", mode = "function", envir = getNamespace("utils")) + helpcontent = ghf(helpinfo) + } else if (inherits(helpinfo, "dev_topic")) { + helpcontent = tools::parse_Rd(helpinfo$path) + } + if (is.null(helpcontent)) { + private$.label = "LABEL COULD NOT BE RETRIEVED" + } else { + private$.label = Filter(function(x) identical(attr(x, "Rd_tag"), "\\title"), helpcontent)[[1]][[1]][1] + } + } + private$.label + }, + + #' @field packages (`character()`) + #' Set of required packages. + #' These packages are loaded, but not attached. + packages = function(rhs) { + if (!missing(rhs)) stop("packages is read-only") + private$.packages + }, + + #' @field properties (`character()`)\cr + #' Stores a set of properties/capabilities the learner has. + #' A complete list of candidate properties, grouped by task type, is stored in + #' [`mlr_reflections$learner_properties`][mlr_reflections]. + properties = function(rhs) { + if (!missing(rhs)) stop("properties is read-only") + private$.properties + }, + + #' @field param_set ([paradox::ParamSet] | `NULL`) + #' Set of hyperparameters. + param_set = function(val) { + if (is.null(private$.param_set)) { + sourcelist = lapply(private$.param_set_source, function(x) eval(x)) + if (length(sourcelist) > 1) { + private$.param_set = ParamSetCollection$new(sourcelist) + } else { + private$.param_set = sourcelist[[1]] + } + } + if (!missing(val) && !identical(val, private$.param_set)) { + stop("param_set is read-only.") + } + private$.param_set + }, + + #' @field man (`character(1)`) + #' String in the format `[pkg]::[class name]` pointing to a manual page for this object. + man = function(rhs) { + if (!missing(rhs)) stop("man is read-only") + paste0(topenv(self$.__enclos_env__)$.__NAMESPACE__.$spec[["name"]], "::", class(self)[[1]]) + }, + + #' @field hash (`character(1)`) + #' Stable hash that includes id, parameter values (if present) and `private$.extra_hash` fields. + hash = function(rhs) { + if (!missing(rhs)) stop("hash is read-only") + calculate_hash(class(self), self$id, .list = c(self$param_set$values, private$.additional_phash_input())) + }, + + #' @field phash (`character(1)`) + #' Stable hash excluding parameter values (for tuning / partial identity), but including `private$.extra_hash`. + phash = function(rhs) { + if (!missing(rhs)) stop("phash is read-only") + calculate_hash( + class(self), self$id, + mget(private$.extra_hash, envir = self) + ) + } + ), + + private = list( + .id = NULL, + .param_set = NULL, + .packages = NULL, + .properties = NULL, + .param_set_source = NULL, + .label = NULL, + + .additional_phash_input = function() { + if (is.null(self$initialize)) return(NULL) + initformals <- names(formals(args(self$initialize))) + if (!test_subset(initformals, c("id", "param_vals"))) { + stopf("Class %s has construction arguments besides 'id' and 'param_vals' but does not overload the private '.additional_phash_input()' function. + +The hash and phash of a class must differ when it represents a different operation; since %s has construction arguments that could change the operation that is performed by it, it is necessary for the $hash and $phash to reflect this. `.additional_phash_input()` should return all the information (e.g. hashes of encapsulated items) that should additionally be hashed; read the help of ?Mlr3Component for more information.", # nolint + class(self)[[1]], class(self)[[1]]) + } + }, + + deep_clone = function(name, value) { + if (!is.null(private$.param_set_source)) { + private$.param_set = NULL # required to keep clone identical to original, otherwise tests get really ugly + if (name == ".param_set_source") { + value = lapply(value, function(x) { + if (inherits(x, "R6")) x$clone(deep = TRUE) else x + }) + } + } + if (is.environment(value) && !is.null(value[[".__enclos_env__"]])) { + return(value$clone(deep = TRUE)) + } + value + } + ) +) + + + +# TODO +# - dict_entry: own id in the dictionary +# - dict_shortaccess: dictionary short access name +# - autotest: construction ars are also active bindings +# - autotest: manual name congruent with dictionary +# - own param set thing +# - autotests from miesmuschel +# - what to do with composite objects? +# - cloning +# - conversion objects diff --git a/R/calculate_hash.R b/R/calculate_hash.R index bb5c9887..eb6a3bd2 100644 --- a/R/calculate_hash.R +++ b/R/calculate_hash.R @@ -9,13 +9,15 @@ #' #' @param ... (`any`)\cr #' Objects to hash. +#' @param .list (`list()`)\cr +#' Additional objects to hash. #' #' @return (`character(1)`). #' @export #' @examples #' calculate_hash(iris, 1, "a") -calculate_hash = function(...) { - digest(lapply(list(...), hash_input), algo = "xxhash64") +calculate_hash = function(..., .list = list()) { + digest(lapply(c(list(...), .list), hash_input), algo = "xxhash64") } #' Hash Input @@ -54,3 +56,12 @@ hash_input.default = function(x) { x } +#' @describeIn hash_input +#' If the R6 object has a `$hash` slot, it is returned. +#' Otherwise, the object is returned as is. +#' @export +hash_input.R6 = function(x) { + # In the following we also avoid accessing `val$hash` twice, because it could + # potentially be an expensive AB. + get0("hash", x, mode = "any", inherits = FALSE, ifnotfound = x) +} diff --git a/man/Mlr3Component.Rd b/man/Mlr3Component.Rd new file mode 100644 index 00000000..2506df03 --- /dev/null +++ b/man/Mlr3Component.Rd @@ -0,0 +1,208 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Mlr3Component.R +\name{Mlr3Component} +\alias{Mlr3Component} +\title{Common Base Class for mlr3 Components} +\description{ +A pragmatic, lightweight base class that captures common patterns across +the mlr3 ecosystem. +It standardizes fields like \code{id}, \code{label}, \code{param_set}, \code{packages}, and \code{man}, +and provides shared methods for printing, help lookup, configuration of +parameter values and fields, and stable hashing. + +Subclasses can specialize behavior by: +\itemize{ +\item Adding additional public/active fields and methods. +\item Overriding \code{print()} for richer output. +\item Extending the hash inputs via \code{private$.extra_hash} or by overriding +the \code{hash}/\code{phash} active bindings if necessary. +} +} +\section{Construction}{ + +Mlr3Component$new(id, param_set = NULL, packages = character(), +label = NA_character_, man = NA_character_) +\itemize{ +\item \code{id} (\code{character(1)}): Identifier of the component. +\item \code{param_set} (\link[paradox:ParamSet]{paradox::ParamSet} | \code{NULL}): Optional hyperparameter set. +If given, its \verb{$values} are used by \verb{$configure()} and are included in the hash. +\item \code{packages} (\code{character()}): Required packages (namespaces). Not attached. +} +} + +\section{Inheriting}{ + +To create your own \code{Mlr3Component}, you need to overload the \verb{$initialize()} function to do additional +initialization. The \verb{$initialize()} method should have at least the arguments \code{id} and \code{param_set}, which should be +passed on to \code{super$initialize()} unchanged. +To create a class without a \code{param_set}, you can pass \code{NULL} as the \code{param_set} argument. +\code{packages} should have the default value \code{character(0)}, meaning no packages are required. +\code{properties} should have the default value \code{character(0)}, meaning no properties are set. + +If the \verb{$initialize()} method has more arguments, then it is necessary to also overload the +\code{private$.additional_phash_input()} function. This function should return either all objects, or a hash of all +objects, that can change the function or behavior of the \code{Mlr3Component} and are independent +of the class, the id, the \verb{$state}, and the \verb{$param_set$values}. The last point is particularly important: +changing the \verb{$param_set$values} should +\emph{not} change the return value of \code{private$.additional_phash_input()}. +} + +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{id}}{(\code{character(1)}) +Identifier of the object. +Used in tables, plot and text output.} + +\item{\code{label}}{(\code{character(1)}) +Human-friendly label. +Can be used in tables, plot and text output instead of the ID.} + +\item{\code{packages}}{(\code{character()}) +Set of required packages. +These packages are loaded, but not attached.} + +\item{\code{properties}}{(\code{character()})\cr +Stores a set of properties/capabilities the learner has. +A complete list of candidate properties, grouped by task type, is stored in +\code{\link[=mlr_reflections]{mlr_reflections$learner_properties}}.} + +\item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{NULL}) +Set of hyperparameters.} + +\item{\code{man}}{(\code{character(1)}) +String in the format \verb{[pkg]::[class name]} pointing to a manual page for this object.} + +\item{\code{hash}}{(\code{character(1)}) +Stable hash that includes id, parameter values (if present) and \code{private$.extra_hash} fields.} + +\item{\code{phash}}{(\code{character(1)}) +Stable hash excluding parameter values (for tuning / partial identity), but including \code{private$.extra_hash}.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-Mlr3Component-new}{\code{Mlr3Component$new()}} +\item \href{#method-Mlr3Component-format}{\code{Mlr3Component$format()}} +\item \href{#method-Mlr3Component-print}{\code{Mlr3Component$print()}} +\item \href{#method-Mlr3Component-help}{\code{Mlr3Component$help()}} +\item \href{#method-Mlr3Component-configure}{\code{Mlr3Component$configure()}} +\item \href{#method-Mlr3Component-require_namespaces}{\code{Mlr3Component$require_namespaces()}} +\item \href{#method-Mlr3Component-clone}{\code{Mlr3Component$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-new}{}}} +\subsection{Method \code{new()}}{ +Construct a new \code{Mlr3Component}. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$new( + id, + param_set = ps(), + packages = character(0), + properties = character(0) +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{id}}{(\code{character(1)})} + +\item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{NULL})} + +\item{\code{packages}}{(\code{character()})} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-format}{}}} +\subsection{Method \code{format()}}{ +Helper for print outputs. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$format(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(ignored).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-print}{}}} +\subsection{Method \code{print()}}{ +Printer with concise, unified output. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$print(...)}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-help}{}}} +\subsection{Method \code{help()}}{ +Opens the corresponding help page referenced by field \verb{$man}. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$help()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-configure}{}}} +\subsection{Method \code{configure()}}{ +Set parameter values and fields in one step. +Named arguments overlapping with the \code{ParamSet} are set as parameters; +remaining arguments are assumed to be regular fields of the object. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$configure(..., .values = list())}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(named \code{any})} + +\item{\code{.values}}{(named \code{list()})} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-require_namespaces}{}}} +\subsection{Method \code{require_namespaces()}}{ +Convenience: load (not attach) required namespaces. +Raises a \code{packageNotFoundError} if loading fails. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$require_namespaces()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/calculate_hash.Rd b/man/calculate_hash.Rd index e410ee62..a0709077 100644 --- a/man/calculate_hash.Rd +++ b/man/calculate_hash.Rd @@ -4,11 +4,14 @@ \alias{calculate_hash} \title{Calculate a Hash for Multiple Objects} \usage{ -calculate_hash(...) +calculate_hash(..., .list = list()) } \arguments{ \item{...}{(\code{any})\cr Objects to hash.} + +\item{.list}{(\code{list()})\cr +Additional objects to hash.} } \value{ (\code{character(1)}). diff --git a/man/hash_input.Rd b/man/hash_input.Rd index 8dc9ccc6..8654a721 100644 --- a/man/hash_input.Rd +++ b/man/hash_input.Rd @@ -5,6 +5,7 @@ \alias{hash_input.function} \alias{hash_input.data.table} \alias{hash_input.default} +\alias{hash_input.R6} \title{Hash Input} \usage{ hash_input(x) @@ -14,6 +15,8 @@ hash_input(x) \method{hash_input}{data.table}(x) \method{hash_input}{default}(x) + +\method{hash_input}{R6}(x) } \arguments{ \item{x}{(\code{any})\cr @@ -33,4 +36,7 @@ The conversion to a list ensures that keys and indices are not included in the h \item \code{hash_input(default)}: Returns the object as is. +\item \code{hash_input(R6)}: If the R6 object has a \verb{$hash} slot, it is returned. +Otherwise, the object is returned as is. + }} From a0ae9fa8577f052743284565616859c3be3e605a Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 12 Aug 2025 14:12:29 +0200 Subject: [PATCH 02/22] progress --- NAMESPACE | 1 + R/Mlr3Component.R | 67 ++++++++++++++++++++++++++++++++----- man/Mlr3Component.Rd | 7 ++-- man/deprecated_component.Rd | 17 ++++++++++ 4 files changed, 82 insertions(+), 10 deletions(-) create mode 100644 man/deprecated_component.Rd diff --git a/NAMESPACE b/NAMESPACE index db2f05bc..13ca6cf7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -72,6 +72,7 @@ export(count_missing) export(crate) export(cross_join) export(deframe) +export(deprecated_component) export(detect) export(dictionary_sugar) export(dictionary_sugar_get) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 0544926a..c3724256 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -46,18 +46,33 @@ Mlr3Component = R6Class("Mlr3Component", #' @param id (`character(1)`) #' @param param_set ([paradox::ParamSet] | `NULL`) #' @param packages (`character()`) - initialize = function(id, param_set = ps(), packages = character(0), properties = character(0)) { - self$id = id + initialize = function(dict_entry, dict_shortaccess, id = dict_entry, + param_set = ps(), packages = character(0), properties = character(0), + representable = TRUE + ) { + private$.has_id = !is.null(id) + if (private$.has_id) { + self$id = id + } else { + # if ID is not provided, it is read-only set to the dictionary entry + private$.id = dict_entry + } if (is.null(packages)) { rm("packages", envir = self) + } else { + assert_character(packages, any.missing = FALSE, min.chars = 1L) + check_packages_installed(packages, + msg = sprintf("Package '%%s' required but not installed for %s '%s'", + class(self)[1L], self$id) + ) } - private$.packages = assert_character(packages, any.missing = FALSE, null.ok = TRUE) + private$.packages = packages if (is.null(properties)) { rm("properties", envir = self) } - private$.properties = assert_character(properties, any.missing = FALSE, null.ok = TRUE) + private$.properties = unique(assert_character(properties, any.missing = FALSE, min.chars = 1L, null.ok = TRUE)) # ParamSet is optional to keep this base class generic across components if (is.null(param_set)) { @@ -86,7 +101,11 @@ Mlr3Component = R6Class("Mlr3Component", #' Helper for print outputs. #' @param ... (ignored). format = function(...) { - sprintf("<%s:%s>", class(self)[1L], self$id) + if (private$.has_id) { + sprintf("<%s:%s>", class(self)[1L], self$id) + } else { + sprintf("<%s>", class(self)[1L]) + } }, #' @description @@ -162,6 +181,9 @@ Mlr3Component = R6Class("Mlr3Component", #' Used in tables, plot and text output. id = function(rhs) { if (!missing(rhs)) { + if (!private$.has_id) { + stop("id is read-only") + } private$.id = assert_string(rhs, min.chars = 1L) } private$.id @@ -235,7 +257,7 @@ Mlr3Component = R6Class("Mlr3Component", #' Stable hash that includes id, parameter values (if present) and `private$.extra_hash` fields. hash = function(rhs) { if (!missing(rhs)) stop("hash is read-only") - calculate_hash(class(self), self$id, .list = c(self$param_set$values, private$.additional_phash_input())) + calculate_hash(class(self), self$id, .list = list(self$param_set$values, private$.additional_phash_input())) }, #' @field phash (`character(1)`) @@ -244,12 +266,13 @@ Mlr3Component = R6Class("Mlr3Component", if (!missing(rhs)) stop("phash is read-only") calculate_hash( class(self), self$id, - mget(private$.extra_hash, envir = self) + list(private$.additional_phash_input()) ) } ), private = list( + .has_id = NULL, .id = NULL, .param_set = NULL, .packages = NULL, @@ -259,6 +282,9 @@ Mlr3Component = R6Class("Mlr3Component", .additional_phash_input = function() { if (is.null(self$initialize)) return(NULL) + sc = sys.calls() + # if we are called through `super$.additional_phash_input()` we are not complaining + if (length(sc) > 1 && identical(sc[[length(sc) - 1]][[1]], quote(super$.additional_phash_input))) return(NULL) initformals <- names(formals(args(self$initialize))) if (!test_subset(initformals, c("id", "param_vals"))) { stopf("Class %s has construction arguments besides 'id' and 'param_vals' but does not overload the private '.additional_phash_input()' function. @@ -290,10 +316,35 @@ The hash and phash of a class must differ when it represents a different operati # TODO # - dict_entry: own id in the dictionary # - dict_shortaccess: dictionary short access name -# - autotest: construction ars are also active bindings +# - autotest: construction vars are also active bindings # - autotest: manual name congruent with dictionary # - own param set thing # - autotests from miesmuschel # - what to do with composite objects? # - cloning # - conversion objects +# - dict_entry NULL -> not in dictionary, constructed via as_learner etc. +# - auto packages +# - warning if dict_entry is wrong +# - but only if constructed via a package; maybe check 'mlr3'-packages first. +# - label & man deprecation + +#' @title Deprecation Message related to the `Mlr3Component` Class +#' +#' @description +#' Will give different messages depending on deprecation progression and will be more agressive in tests than +#' interactively. +#' +#' @keywords internal +#' @param msg (`character(1)`) +#' Message to print. +#' @export +deprecated_component = function(msg) { + if (!identical(getOption("mlr3.on_deprecated", "ignore"), "ignore")) { + if (identical(getOption("mlr3.on_deprecated"), "warn")) { + warning(msg) + } else { + stop(msg) + } + } +} diff --git a/man/Mlr3Component.Rd b/man/Mlr3Component.Rd index 2506df03..3a1b8fcd 100644 --- a/man/Mlr3Component.Rd +++ b/man/Mlr3Component.Rd @@ -100,10 +100,13 @@ Stable hash excluding parameter values (for tuning / partial identity), but incl Construct a new \code{Mlr3Component}. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Mlr3Component$new( - id, + dict_entry, + dict_shortaccess, + id = dict_entry, param_set = ps(), packages = character(0), - properties = character(0) + properties = character(0), + representable = TRUE )}\if{html}{\out{
}} } diff --git a/man/deprecated_component.Rd b/man/deprecated_component.Rd new file mode 100644 index 00000000..2e12c35c --- /dev/null +++ b/man/deprecated_component.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Mlr3Component.R +\name{deprecated_component} +\alias{deprecated_component} +\title{Deprecation Message related to the \code{Mlr3Component} Class} +\usage{ +deprecated_component(msg) +} +\arguments{ +\item{msg}{(\code{character(1)}) +Message to print.} +} +\description{ +Will give different messages depending on deprecation progression and will be more agressive in tests than +interactively. +} +\keyword{internal} From 531c34b07fa38f4046d467f560f05bd460e58305 Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 12 Aug 2025 17:23:18 +0200 Subject: [PATCH 03/22] progress --- R/Mlr3Component.R | 78 +++++++++++++++++++++++++++++++++++--------- man/Mlr3Component.Rd | 22 +++++++++++++ 2 files changed, 84 insertions(+), 16 deletions(-) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index c3724256..5c2fb15b 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -39,7 +39,6 @@ #' #' @export Mlr3Component = R6Class("Mlr3Component", - lock_objects = FALSE, public = list( #' @description #' Construct a new `Mlr3Component`. @@ -58,20 +57,19 @@ Mlr3Component = R6Class("Mlr3Component", private$.id = dict_entry } - if (is.null(packages)) { - rm("packages", envir = self) - } else { + if (!is.null(packages)) { assert_character(packages, any.missing = FALSE, min.chars = 1L) check_packages_installed(packages, msg = sprintf("Package '%%s' required but not installed for %s '%s'", - class(self)[1L], self$id) - ) + class(self)[1L], self$id)) + env = self$.__enclos_env__ + while (!is.null(env)) { + packages[[length(packages) + 1]] = topenv(env)$.__NAMESPACE__.$spec[["name"]] + env = env$super$.__enclos_env__ + } + private$.packages = unique(packages[packages != "mlr3misc"]) } - private$.packages = packages - if (is.null(properties)) { - rm("properties", envir = self) - } private$.properties = unique(assert_character(properties, any.missing = FALSE, min.chars = 1L, null.ok = TRUE)) # ParamSet is optional to keep this base class generic across components @@ -93,8 +91,6 @@ Mlr3Component = R6Class("Mlr3Component", msg = sprintf("Package '%%s' required but not installed for %s '%s'", class(self)[1L], self$id)) } - lockEnvironment(self) - lockEnvironment(private) }, #' @description @@ -172,6 +168,25 @@ Mlr3Component = R6Class("Mlr3Component", require_namespaces = function() { if (length(self$packages)) require_namespaces(self$packages) invisible(self) + }, + + #' @description + #' Override the `man` and `hash` fields. + #' @param man (`character(1)` | `NULL`) + #' The manual page of the component. + #' @param hash (`character(1)` | `NULL`) + #' The hash of the component. + override_info = function(man = NULL, hash = NULL) { + if (!is.null(man)) { + private$.man = man + private$.label = NULL + } + if (!is.null(hash)) { + private$.hashmap = NULL + orig_phash = self$phash + orig_hash = self$hash + private$.hashmap = structure(c(hash, hash), names = c(orig_hash, orig_phash)) + } } ), @@ -250,24 +265,51 @@ Mlr3Component = R6Class("Mlr3Component", #' String in the format `[pkg]::[class name]` pointing to a manual page for this object. man = function(rhs) { if (!missing(rhs)) stop("man is read-only") - paste0(topenv(self$.__enclos_env__)$.__NAMESPACE__.$spec[["name"]], "::", class(self)[[1]]) + if (is.null(private$.man)) { + iter = 1 + env = self + repeat { + man = paste0(topenv(env$.__enclos_env__)$.__NAMESPACE__.$spec[["name"]], "::", class(self)[[iter]]) + help_works = tryCatch({ + open_help(man) + TRUE + }, error = function(e) FALSE) + if (help_works) { + private$.man = man + break + } + iter = iter + 1 + env = env$.__enclos_env__$super + } + } + private$.man }, #' @field hash (`character(1)`) #' Stable hash that includes id, parameter values (if present) and `private$.extra_hash` fields. hash = function(rhs) { if (!missing(rhs)) stop("hash is read-only") - calculate_hash(class(self), self$id, .list = list(self$param_set$values, private$.additional_phash_input())) + hash = calculate_hash(class(self), self$id, .list = list(self$param_set$values, private$.additional_phash_input())) + if (hash %in% names(private$.hashmap)) { + private$.hashmap[[hash]] + } else { + hash + } }, #' @field phash (`character(1)`) #' Stable hash excluding parameter values (for tuning / partial identity), but including `private$.extra_hash`. phash = function(rhs) { if (!missing(rhs)) stop("phash is read-only") - calculate_hash( + hash = calculate_hash( class(self), self$id, list(private$.additional_phash_input()) ) + if (hash %in% names(private$.hashmap)) { + private$.hashmap[[hash]] + } else { + hash + } } ), @@ -279,6 +321,8 @@ Mlr3Component = R6Class("Mlr3Component", .properties = NULL, .param_set_source = NULL, .label = NULL, + .man = NULL, + .hashmap = NULL, .additional_phash_input = function() { if (is.null(self$initialize)) return(NULL) @@ -303,7 +347,7 @@ The hash and phash of a class must differ when it represents a different operati }) } } - if (is.environment(value) && !is.null(value[[".__enclos_env__"]])) { + if (is.environment(value) && ".__enclos_env__" %in% names(value) && "clone" %in% names(value)) { return(value$clone(deep = TRUE)) } value @@ -321,6 +365,7 @@ The hash and phash of a class must differ when it represents a different operati # - own param set thing # - autotests from miesmuschel # - what to do with composite objects? + # - cloning # - conversion objects # - dict_entry NULL -> not in dictionary, constructed via as_learner etc. @@ -328,6 +373,7 @@ The hash and phash of a class must differ when it represents a different operati # - warning if dict_entry is wrong # - but only if constructed via a package; maybe check 'mlr3'-packages first. # - label & man deprecation +# - what to do with non-algorithm classes (objective, resamplingresult, databackend, etc)? #' @title Deprecation Message related to the `Mlr3Component` Class #' diff --git a/man/Mlr3Component.Rd b/man/Mlr3Component.Rd index 3a1b8fcd..68356a61 100644 --- a/man/Mlr3Component.Rd +++ b/man/Mlr3Component.Rd @@ -90,6 +90,7 @@ Stable hash excluding parameter values (for tuning / partial identity), but incl \item \href{#method-Mlr3Component-help}{\code{Mlr3Component$help()}} \item \href{#method-Mlr3Component-configure}{\code{Mlr3Component$configure()}} \item \href{#method-Mlr3Component-require_namespaces}{\code{Mlr3Component$require_namespaces()}} +\item \href{#method-Mlr3Component-override_info}{\code{Mlr3Component$override_info()}} \item \href{#method-Mlr3Component-clone}{\code{Mlr3Component$clone()}} } } @@ -190,6 +191,27 @@ Raises a \code{packageNotFoundError} if loading fails. \if{html}{\out{
}}\preformatted{Mlr3Component$require_namespaces()}\if{html}{\out{
}} } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Mlr3Component-override_info}{}}} +\subsection{Method \code{override_info()}}{ +Override the \code{man} and \code{hash} fields. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Mlr3Component$override_info(man = NULL, hash = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{man}}{(\code{character(1)} | \code{NULL}) +The manual page of the component.} + +\item{\code{hash}}{(\code{character(1)} | \code{NULL}) +The hash of the component.} +} +\if{html}{\out{
}} +} } \if{html}{\out{
}} \if{html}{\out{}} From 9983d69c4c3389d87548fd3ad7cefb4a2e266cc7 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 13 Aug 2025 14:00:11 +0200 Subject: [PATCH 04/22] deprecated writing to man / properties --- R/Mlr3Component.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 5c2fb15b..86f7f5bd 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -240,7 +240,11 @@ Mlr3Component = R6Class("Mlr3Component", #' A complete list of candidate properties, grouped by task type, is stored in #' [`mlr_reflections$learner_properties`][mlr_reflections]. properties = function(rhs) { - if (!missing(rhs)) stop("properties is read-only") + if (!missing(rhs)) { + deprecated_component("writing to properties is deprecated. Write to private$.properties if this is necessary for tests.") # nolint + # stop("properties is read-only") + private$.properties = rhs + } private$.properties }, @@ -264,7 +268,11 @@ Mlr3Component = R6Class("Mlr3Component", #' @field man (`character(1)`) #' String in the format `[pkg]::[class name]` pointing to a manual page for this object. man = function(rhs) { - if (!missing(rhs)) stop("man is read-only") + if (!missing(rhs)) { + deprecated_component("writing to man is deprecated") + private$.man = rhs + # stop("man is read-only") + } if (is.null(private$.man)) { iter = 1 env = self From c16cac293e137930c77b80d6ff72950872f9a1d5 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 13 Aug 2025 15:24:07 +0200 Subject: [PATCH 05/22] paradox::assert_param_set --- R/Mlr3Component.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 86f7f5bd..9617164e 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -78,10 +78,10 @@ Mlr3Component = R6Class("Mlr3Component", private$.param_set_source = NULL rm("param_set", envir = self) } else if (inherits(param_set, "ParamSet")) { - private$.param_set = assert_param_set(param_set) + private$.param_set = paradox::assert_param_set(param_set) private$.param_set_source = NULL } else { - lapply(param_set, function(x) assert_param_set(eval(x))) + lapply(param_set, function(x) paradox::assert_param_set(eval(x))) private$.param_set_source = param_set } From 52c0c23ce2c832411b38002eea1f27bee4e0b4c6 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 13 Aug 2025 15:44:47 +0200 Subject: [PATCH 06/22] fix CI --- R/Mlr3Component.R | 4 +--- R/topo_sort.R | 2 +- man/Mlr3Component.Rd | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 9617164e..727358cf 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -236,9 +236,7 @@ Mlr3Component = R6Class("Mlr3Component", }, #' @field properties (`character()`)\cr - #' Stores a set of properties/capabilities the learner has. - #' A complete list of candidate properties, grouped by task type, is stored in - #' [`mlr_reflections$learner_properties`][mlr_reflections]. + #' Stores a set of properties/capabilities the object has. properties = function(rhs) { if (!missing(rhs)) { deprecated_component("writing to properties is deprecated. Write to private$.properties if this is necessary for tests.") # nolint diff --git a/R/topo_sort.R b/R/topo_sort.R index 87146427..d9bd4fd4 100644 --- a/R/topo_sort.R +++ b/R/topo_sort.R @@ -49,7 +49,7 @@ topo_sort = function(nodes) { } j = (j %% n) + 1L # inc j, but wrap around end if (j == 1L) { # we wrapped, lets remove nodes of current layer from deps - layer = nodes[.(depth_count), id, on = "depth", nomatch = NULL] + layer = nodes[list(depth_count), id, on = "depth", nomatch = NULL] if (length(layer) == 0L) { stop("Cycle detected, this is not a DAG!") } diff --git a/man/Mlr3Component.Rd b/man/Mlr3Component.Rd index 68356a61..8ca7e3ae 100644 --- a/man/Mlr3Component.Rd +++ b/man/Mlr3Component.Rd @@ -63,9 +63,7 @@ Set of required packages. These packages are loaded, but not attached.} \item{\code{properties}}{(\code{character()})\cr -Stores a set of properties/capabilities the learner has. -A complete list of candidate properties, grouped by task type, is stored in -\code{\link[=mlr_reflections]{mlr_reflections$learner_properties}}.} +Stores a set of properties/capabilities the object has.} \item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{NULL}) Set of hyperparameters.} From febc78f77d4ce6f6923c0f0812f9dfc40ab88390 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 13 Aug 2025 23:27:22 +0200 Subject: [PATCH 07/22] robuster packages inference --- R/Mlr3Component.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 727358cf..3d43178c 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -64,7 +64,10 @@ Mlr3Component = R6Class("Mlr3Component", class(self)[1L], self$id)) env = self$.__enclos_env__ while (!is.null(env)) { - packages[[length(packages) + 1]] = topenv(env)$.__NAMESPACE__.$spec[["name"]] + newpkg = topenv(env)$.__NAMESPACE__.$spec[["name"]] + if (length(newpkg) == 1L) { + packages[[length(packages) + 1]] = newpkg + } env = env$super$.__enclos_env__ } private$.packages = unique(packages[packages != "mlr3misc"]) From 98f7a0bf7ee3b8e6b8f9d9bb177d14e76ec0dad0 Mon Sep 17 00:00:00 2001 From: mb706 Date: Thu, 14 Aug 2025 00:00:33 +0200 Subject: [PATCH 08/22] tests work when no seed is set --- tests/testthat/test_encapsulate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_encapsulate.R b/tests/testthat/test_encapsulate.R index 8c49d8da..5a0659d0 100644 --- a/tests/testthat/test_encapsulate.R +++ b/tests/testthat/test_encapsulate.R @@ -112,7 +112,7 @@ test_that("try", { test_that("rng state is transferred", { - rng_state = .GlobalEnv$.Random.seed + rng_state = get_seed() on.exit({.GlobalEnv$.Random.seed = rng_state}) fun = function() { From 829127e885913ffb204b381dda6dc1c937cadc1b Mon Sep 17 00:00:00 2001 From: mb706 Date: Thu, 14 Aug 2025 14:04:18 +0200 Subject: [PATCH 09/22] check that dict_entry is used --- R/Mlr3Component.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 3d43178c..f28662e1 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -49,6 +49,7 @@ Mlr3Component = R6Class("Mlr3Component", param_set = ps(), packages = character(0), properties = character(0), representable = TRUE ) { + assert_string(dict_entry) private$.has_id = !is.null(id) if (private$.has_id) { self$id = id From 5208863f0795488e6be102fabdbe384da79a69f2 Mon Sep 17 00:00:00 2001 From: mb706 Date: Thu, 14 Aug 2025 15:12:36 +0200 Subject: [PATCH 10/22] better help page inference --- R/Mlr3Component.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index f28662e1..411b7d1c 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -281,8 +281,7 @@ Mlr3Component = R6Class("Mlr3Component", repeat { man = paste0(topenv(env$.__enclos_env__)$.__NAMESPACE__.$spec[["name"]], "::", class(self)[[iter]]) help_works = tryCatch({ - open_help(man) - TRUE + length(as.character(open_help(man))) > 0L }, error = function(e) FALSE) if (help_works) { private$.man = man From 1edefc415d3544809adde80c787d2d9af9244daf Mon Sep 17 00:00:00 2001 From: mb706 Date: Thu, 14 Aug 2025 23:31:18 +0200 Subject: [PATCH 11/22] access ParamSetCollection through paradox:: --- R/Mlr3Component.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 411b7d1c..3884f98f 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -256,7 +256,7 @@ Mlr3Component = R6Class("Mlr3Component", if (is.null(private$.param_set)) { sourcelist = lapply(private$.param_set_source, function(x) eval(x)) if (length(sourcelist) > 1) { - private$.param_set = ParamSetCollection$new(sourcelist) + private$.param_set = paradox::ParamSetCollection$new(sourcelist) } else { private$.param_set = sourcelist[[1]] } From a7410fb6c17e318561305bb04844db48d195c19d Mon Sep 17 00:00:00 2001 From: mb706 Date: Fri, 15 Aug 2025 12:07:17 +0200 Subject: [PATCH 12/22] remove duplicate package check --- R/Mlr3Component.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 3884f98f..b72a7c98 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -88,13 +88,6 @@ Mlr3Component = R6Class("Mlr3Component", lapply(param_set, function(x) paradox::assert_param_set(eval(x))) private$.param_set_source = param_set } - - # upfront check; does not attach - if (length(self$packages)) { - check_packages_installed(self$packages, - msg = sprintf("Package '%%s' required but not installed for %s '%s'", - class(self)[1L], self$id)) - } }, #' @description From bc1cf2d74438b16f3f2bdb95a3eb91f70377f749 Mon Sep 17 00:00:00 2001 From: mb706 Date: Fri, 15 Aug 2025 15:36:44 +0200 Subject: [PATCH 13/22] deprecation message rename --- NAMESPACE | 2 +- R/Mlr3Component.R | 55 +++++++++++++------------------------ man/Mlr3Component.Rd | 49 +++++++-------------------------- man/deprecated_component.Rd | 17 ------------ 4 files changed, 30 insertions(+), 93 deletions(-) delete mode 100644 man/deprecated_component.Rd diff --git a/NAMESPACE b/NAMESPACE index 13ca6cf7..0a5dda9e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -72,7 +72,6 @@ export(count_missing) export(crate) export(cross_join) export(deframe) -export(deprecated_component) export(detect) export(dictionary_sugar) export(dictionary_sugar_get) @@ -122,6 +121,7 @@ export(map_int) export(map_lgl) export(map_values) export(messagef) +export(mlr3component_deprecation_msg) export(mlr_callbacks) export(modify_at) export(modify_if) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index b72a7c98..47989457 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -1,31 +1,23 @@ #' @title Common Base Class for mlr3 Components #' #' @description -#' A pragmatic, lightweight base class that captures common patterns across -#' the mlr3 ecosystem. -#' It standardizes fields like `id`, `label`, `param_set`, `packages`, and `man`, -#' and provides shared methods for printing, help lookup, configuration of -#' parameter values and fields, and stable hashing. +#' A pragmatic, lightweight base class that captures common patterns across the mlr3 ecosystem. +#' It standardizes various fields and provides shared methods for printing, help lookup, setting parameter values and fields, and hashing. +#' +#' @section Inheriting: +#' To create your own `Mlr3Component`, you need to overload the `$initialize()` function. +#' A concrete class should ideally provide all arguments of the `$initialize()` directly, i.e. the user should not need to provide `id` or `param_set`. +#' +#' The information contained in a concrete mlr3 component should usually be completely determined by three things: +#' 1. The arguments given during construction. +#' These +#' +#' +#' #' -#' Subclasses can specialize behavior by: -#' - Adding additional public/active fields and methods. -#' - Overriding `print()` for richer output. -#' - Extending the hash inputs via `private$.extra_hash` or by overriding -#' the `hash`/`phash` active bindings if necessary. #' -#' @section Construction: -#' Mlr3Component$new(id, param_set = NULL, packages = character(), -#' label = NA_character_, man = NA_character_) #' -#' - `id` (`character(1)`): Identifier of the component. -#' - `param_set` ([paradox::ParamSet] | `NULL`): Optional hyperparameter set. -#' If given, its `$values` are used by `$configure()` and are included in the hash. -#' - `packages` (`character()`): Required packages (namespaces). Not attached. #' -#' @section Inheriting: -#' To create your own `Mlr3Component`, you need to overload the `$initialize()` function to do additional -#' initialization. The `$initialize()` method should have at least the arguments `id` and `param_set`, which should be -#' passed on to `super$initialize()` unchanged. #' To create a class without a `param_set`, you can pass `NULL` as the `param_set` argument. #' `packages` should have the default value `character(0)`, meaning no packages are required. #' `properties` should have the default value `character(0)`, meaning no properties are set. @@ -80,7 +72,6 @@ Mlr3Component = R6Class("Mlr3Component", if (is.null(param_set)) { private$.param_set = NULL private$.param_set_source = NULL - rm("param_set", envir = self) } else if (inherits(param_set, "ParamSet")) { private$.param_set = paradox::assert_param_set(param_set) private$.param_set_source = NULL @@ -159,14 +150,6 @@ Mlr3Component = R6Class("Mlr3Component", invisible(self) }, - #' @description - #' Convenience: load (not attach) required namespaces. - #' Raises a `packageNotFoundError` if loading fails. - require_namespaces = function() { - if (length(self$packages)) require_namespaces(self$packages) - invisible(self) - }, - #' @description #' Override the `man` and `hash` fields. #' @param man (`character(1)` | `NULL`) @@ -236,7 +219,7 @@ Mlr3Component = R6Class("Mlr3Component", #' Stores a set of properties/capabilities the object has. properties = function(rhs) { if (!missing(rhs)) { - deprecated_component("writing to properties is deprecated. Write to private$.properties if this is necessary for tests.") # nolint + mlr3component_deprecation_msg("writing to properties is deprecated. Write to private$.properties if this is necessary for tests.") # nolint # stop("properties is read-only") private$.properties = rhs } @@ -246,7 +229,7 @@ Mlr3Component = R6Class("Mlr3Component", #' @field param_set ([paradox::ParamSet] | `NULL`) #' Set of hyperparameters. param_set = function(val) { - if (is.null(private$.param_set)) { + if (is.null(private$.param_set) && !is.null(private$.param_set_source)) { sourcelist = lapply(private$.param_set_source, function(x) eval(x)) if (length(sourcelist) > 1) { private$.param_set = paradox::ParamSetCollection$new(sourcelist) @@ -264,7 +247,7 @@ Mlr3Component = R6Class("Mlr3Component", #' String in the format `[pkg]::[class name]` pointing to a manual page for this object. man = function(rhs) { if (!missing(rhs)) { - deprecated_component("writing to man is deprecated") + mlr3component_deprecation_msg("writing to man is deprecated") private$.man = rhs # stop("man is read-only") } @@ -387,9 +370,9 @@ The hash and phash of a class must differ when it represents a different operati #' @param msg (`character(1)`) #' Message to print. #' @export -deprecated_component = function(msg) { - if (!identical(getOption("mlr3.on_deprecated", "ignore"), "ignore")) { - if (identical(getOption("mlr3.on_deprecated"), "warn")) { +mlr3component_deprecation_msg = function(msg) { + if (!identical(getOption("mlr3.on_deprecated_mlr3component", "ignore"), "ignore")) { + if (identical(getOption("mlr3.on_deprecated_mlr3component"), "warn")) { warning(msg) } else { stop(msg) diff --git a/man/Mlr3Component.Rd b/man/Mlr3Component.Rd index 8ca7e3ae..b9db10cc 100644 --- a/man/Mlr3Component.Rd +++ b/man/Mlr3Component.Rd @@ -4,37 +4,20 @@ \alias{Mlr3Component} \title{Common Base Class for mlr3 Components} \description{ -A pragmatic, lightweight base class that captures common patterns across -the mlr3 ecosystem. -It standardizes fields like \code{id}, \code{label}, \code{param_set}, \code{packages}, and \code{man}, -and provides shared methods for printing, help lookup, configuration of -parameter values and fields, and stable hashing. - -Subclasses can specialize behavior by: -\itemize{ -\item Adding additional public/active fields and methods. -\item Overriding \code{print()} for richer output. -\item Extending the hash inputs via \code{private$.extra_hash} or by overriding -the \code{hash}/\code{phash} active bindings if necessary. +A pragmatic, lightweight base class that captures common patterns across the mlr3 ecosystem. +It standardizes various fields and provides shared methods for printing, help lookup, setting parameter values and fields, and hashing. } -} -\section{Construction}{ +\section{Inheriting}{ -Mlr3Component$new(id, param_set = NULL, packages = character(), -label = NA_character_, man = NA_character_) -\itemize{ -\item \code{id} (\code{character(1)}): Identifier of the component. -\item \code{param_set} (\link[paradox:ParamSet]{paradox::ParamSet} | \code{NULL}): Optional hyperparameter set. -If given, its \verb{$values} are used by \verb{$configure()} and are included in the hash. -\item \code{packages} (\code{character()}): Required packages (namespaces). Not attached. -} -} +To create your own \code{Mlr3Component}, you need to overload the \verb{$initialize()} function. +A concrete class should ideally provide all arguments of the \verb{$initialize()} directly, i.e. the user should not need to provide \code{id} or \code{param_set}. -\section{Inheriting}{ +The information contained in a concrete mlr3 component should usually be completely determined by three things: +\enumerate{ +\item The arguments given during construction. +These +} -To create your own \code{Mlr3Component}, you need to overload the \verb{$initialize()} function to do additional -initialization. The \verb{$initialize()} method should have at least the arguments \code{id} and \code{param_set}, which should be -passed on to \code{super$initialize()} unchanged. To create a class without a \code{param_set}, you can pass \code{NULL} as the \code{param_set} argument. \code{packages} should have the default value \code{character(0)}, meaning no packages are required. \code{properties} should have the default value \code{character(0)}, meaning no properties are set. @@ -87,7 +70,6 @@ Stable hash excluding parameter values (for tuning / partial identity), but incl \item \href{#method-Mlr3Component-print}{\code{Mlr3Component$print()}} \item \href{#method-Mlr3Component-help}{\code{Mlr3Component$help()}} \item \href{#method-Mlr3Component-configure}{\code{Mlr3Component$configure()}} -\item \href{#method-Mlr3Component-require_namespaces}{\code{Mlr3Component$require_namespaces()}} \item \href{#method-Mlr3Component-override_info}{\code{Mlr3Component$override_info()}} \item \href{#method-Mlr3Component-clone}{\code{Mlr3Component$clone()}} } @@ -178,17 +160,6 @@ remaining arguments are assumed to be regular fields of the object. } \if{html}{\out{}} } -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Mlr3Component-require_namespaces}{}}} -\subsection{Method \code{require_namespaces()}}{ -Convenience: load (not attach) required namespaces. -Raises a \code{packageNotFoundError} if loading fails. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Mlr3Component$require_namespaces()}\if{html}{\out{
}} -} - } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/man/deprecated_component.Rd b/man/deprecated_component.Rd deleted file mode 100644 index 2e12c35c..00000000 --- a/man/deprecated_component.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Mlr3Component.R -\name{deprecated_component} -\alias{deprecated_component} -\title{Deprecation Message related to the \code{Mlr3Component} Class} -\usage{ -deprecated_component(msg) -} -\arguments{ -\item{msg}{(\code{character(1)}) -Message to print.} -} -\description{ -Will give different messages depending on deprecation progression and will be more agressive in tests than -interactively. -} -\keyword{internal} From 9aafec5cc508daab0143e6fed9a60e021c57bcca Mon Sep 17 00:00:00 2001 From: mb706 Date: Fri, 15 Aug 2025 17:56:54 +0200 Subject: [PATCH 14/22] docs --- R/Mlr3Component.R | 38 +++++++++++++++++++++++++--- man/Mlr3Component.Rd | 34 ++++++++++++++++++++++--- man/mlr3component_deprecation_msg.Rd | 17 +++++++++++++ 3 files changed, 82 insertions(+), 7 deletions(-) create mode 100644 man/mlr3component_deprecation_msg.Rd diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 47989457..0b449ab2 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -4,13 +4,43 @@ #' A pragmatic, lightweight base class that captures common patterns across the mlr3 ecosystem. #' It standardizes various fields and provides shared methods for printing, help lookup, setting parameter values and fields, and hashing. #' +#' Semantically, an mlr3 component is usually an object representing an algorithm or a method, such as a [mlr3::Learner] or a [mlr3::Measure]. +#' This algorithm can be configured though its parameters, accessible as the [paradox::ParamSet] in the `$param_set` field, as well as +#' through various other custom algorithm-specific fields. +#' All of these together can be changed via the `$configure()` method. +#' Some components, such as prominently the [mlr3::Learner], also have "state", such as the learned model. +#' +#' The identity of an object represented by an [`Mlr3Component`] is sometimes important, for example when aggregating benchmark results accross various settings of different algorithms used in tha benchmark, such as different [mlr3::Learner]s or different [mlr3::Measure]. +#' For this,the component provides a `$hash` field, which should identify the algorithm and its configuration, without including the state. +#' There is also the `$phash` field, which identifies the algorithm without its `$param_set` configuration -- this is used when aggregating benchmark results for individual algorithms when these algorithms were evaluated for different configuration parameter settings. +#' +#' [`Mlr3Component`]s should usually be constructed from a [Dictionary], which should be accessible via a short-form, such as [mlr3::lrn] or [mlr3pipelines::po]. +#' #' @section Inheriting: #' To create your own `Mlr3Component`, you need to overload the `$initialize()` function. #' A concrete class should ideally provide all arguments of the `$initialize()` directly, i.e. the user should not need to provide `id` or `param_set`. #' -#' The information contained in a concrete mlr3 component should usually be completely determined by three things: -#' 1. The arguments given during construction. -#' These +#' The information contained in a concrete mlr3 component should usually be completely determined by four things: +#' +#' 1. The *construction arguments* given to the `$initialize()` method of the concrete class. +#' These can be [`Mlr3Component`]s themselves, or configuration options that do not naturally fit into the [paradox::ParamSet]. +#' These arguments should *not* overlap with the [paradox::ParamSet] parameters, and should not be the construction arguments of the +#' abstract [Mlr3Component] base class such as `id` or `packages`. +#' 2. The *configuration arguments* inside `$param_set$values`. +#' 3. Additional *configuration settings* that influence the behavior of the component, but are not part of the [paradox::ParamSet] because they do not naturally constitute a dimension that could be optimized. +#' 4. Some additional *state information*, storing the result of the algorithm, such as the learned model, often contained in a field called `$state`. +#' +#' Information from 1. should also be made accessible as active bindings, with the same name as the construction arguments. +#' +#' The information from 1., and 3. is contained in the `$phash` value. +#' For this, the `private$.additional_phash_input()` function needs to be overloaded by subclasses. +#' It is often sufficient for an abstract subclass to implement this, and concrete classes to inherit from this. E.g. the [mlr3::Learner] class implements `$private$.additional_phash_input()` to return the necessary iformation to be included in the `$phash` for almost all possible [mlr3::Learner]s. +#' Only concrete [mlr3::Learner]s that contain additional information not contained in one of the standard fields needs to overload the function again, such as e.g. [mlr3tuning::AutoTuner]. +#' It is best if this second overload only collects the additional information not contained in the abstract base class, and also calls `super$.additional_phash_input()`. +#' +#' The information from 1., 2., and 3. together is contained in the `$phash` value. +#' It is also collected automatically from the `private$.additional_phash_input()` function, as well as the `$param_set$values` field. +#' #' #' #' @@ -274,7 +304,7 @@ Mlr3Component = R6Class("Mlr3Component", #' Stable hash that includes id, parameter values (if present) and `private$.extra_hash` fields. hash = function(rhs) { if (!missing(rhs)) stop("hash is read-only") - hash = calculate_hash(class(self), self$id, .list = list(self$param_set$values, private$.additional_phash_input())) + hash = calculate_hash(class(self), self$id, .list = c(self$param_set$values, private$.additional_phash_input())) if (hash %in% names(private$.hashmap)) { private$.hashmap[[hash]] } else { diff --git a/man/Mlr3Component.Rd b/man/Mlr3Component.Rd index b9db10cc..a7c014d6 100644 --- a/man/Mlr3Component.Rd +++ b/man/Mlr3Component.Rd @@ -6,18 +6,46 @@ \description{ A pragmatic, lightweight base class that captures common patterns across the mlr3 ecosystem. It standardizes various fields and provides shared methods for printing, help lookup, setting parameter values and fields, and hashing. + +Semantically, an mlr3 component is usually an object representing an algorithm or a method, such as a \link[mlr3:Learner]{mlr3::Learner} or a \link[mlr3:Measure]{mlr3::Measure}. +This algorithm can be configured though its parameters, accessible as the \link[paradox:ParamSet]{paradox::ParamSet} in the \verb{$param_set} field, as well as +through various other custom algorithm-specific fields. +All of these together can be changed via the \verb{$configure()} method. +Some components, such as prominently the \link[mlr3:Learner]{mlr3::Learner}, also have "state", such as the learned model. + +The identity of an object represented by an \code{\link{Mlr3Component}} is sometimes important, for example when aggregating benchmark results accross various settings of different algorithms used in tha benchmark, such as different \link[mlr3:Learner]{mlr3::Learner}s or different \link[mlr3:Measure]{mlr3::Measure}. +For this,the component provides a \verb{$hash} field, which should identify the algorithm and its configuration, without including the state. +There is also the \verb{$phash} field, which identifies the algorithm without its \verb{$param_set} configuration -- this is used when aggregating benchmark results for individual algorithms when these algorithms were evaluated for different configuration parameter settings. + +\code{\link{Mlr3Component}}s should usually be constructed from a \link{Dictionary}, which should be accessible via a short-form, such as \link[mlr3:mlr_sugar]{mlr3::lrn} or \link[mlr3pipelines:po]{mlr3pipelines::po}. } \section{Inheriting}{ To create your own \code{Mlr3Component}, you need to overload the \verb{$initialize()} function. A concrete class should ideally provide all arguments of the \verb{$initialize()} directly, i.e. the user should not need to provide \code{id} or \code{param_set}. -The information contained in a concrete mlr3 component should usually be completely determined by three things: +The information contained in a concrete mlr3 component should usually be completely determined by four things: \enumerate{ -\item The arguments given during construction. -These +\item The \emph{construction arguments} given to the \verb{$initialize()} method of the concrete class. +These can be \code{\link{Mlr3Component}}s themselves, or configuration options that do not naturally fit into the \link[paradox:ParamSet]{paradox::ParamSet}. +These arguments should \emph{not} overlap with the \link[paradox:ParamSet]{paradox::ParamSet} parameters, and should not be the construction arguments of the +abstract \link{Mlr3Component} base class such as \code{id} or \code{packages}. +\item The \emph{configuration arguments} inside \verb{$param_set$values}. +\item Additional \emph{configuration settings} that influence the behavior of the component, but are not part of the \link[paradox:ParamSet]{paradox::ParamSet} because they do not naturally constitute a dimension that could be optimized. +\item Some additional \emph{state information}, storing the result of the algorithm, such as the learned model, often contained in a field called \verb{$state}. } +Information from 1. should also be made accessible as active bindings, with the same name as the construction arguments. + +The information from 1., and 3. is contained in the \verb{$phash} value. +For this, the \code{private$.additional_phash_input()} function needs to be overloaded by subclasses. +It is often sufficient for an abstract subclass to implement this, and concrete classes to inherit from this. E.g. the \link[mlr3:Learner]{mlr3::Learner} class implements \verb{$private$.additional_phash_input()} to return the necessary iformation to be included in the \verb{$phash} for almost all possible \link[mlr3:Learner]{mlr3::Learner}s. +Only concrete \link[mlr3:Learner]{mlr3::Learner}s that contain additional information not contained in one of the standard fields needs to overload the function again, such as e.g. \link[mlr3tuning:AutoTuner]{mlr3tuning::AutoTuner}. +It is best if this second overload only collects the additional information not contained in the abstract base class, and also calls \code{super$.additional_phash_input()}. + +The information from 1., 2., and 3. together is contained in the \verb{$phash} value. +It is also collected automatically from the \code{private$.additional_phash_input()} function, as well as the \verb{$param_set$values} field. + To create a class without a \code{param_set}, you can pass \code{NULL} as the \code{param_set} argument. \code{packages} should have the default value \code{character(0)}, meaning no packages are required. \code{properties} should have the default value \code{character(0)}, meaning no properties are set. diff --git a/man/mlr3component_deprecation_msg.Rd b/man/mlr3component_deprecation_msg.Rd new file mode 100644 index 00000000..8a0f7bc4 --- /dev/null +++ b/man/mlr3component_deprecation_msg.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Mlr3Component.R +\name{mlr3component_deprecation_msg} +\alias{mlr3component_deprecation_msg} +\title{Deprecation Message related to the \code{Mlr3Component} Class} +\usage{ +mlr3component_deprecation_msg(msg) +} +\arguments{ +\item{msg}{(\code{character(1)}) +Message to print.} +} +\description{ +Will give different messages depending on deprecation progression and will be more agressive in tests than +interactively. +} +\keyword{internal} From b67c9386a2e7117924e5db39c4b40508cd398227 Mon Sep 17 00:00:00 2001 From: mb706 Date: Fri, 15 Aug 2025 19:36:26 +0200 Subject: [PATCH 15/22] documentation --- R/Mlr3Component.R | 83 ++++++++++++++++++++++++++++++---------- man/Mlr3Component.Rd | 90 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 133 insertions(+), 40 deletions(-) diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 0b449ab2..fe18f673 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -41,34 +41,54 @@ #' The information from 1., 2., and 3. together is contained in the `$phash` value. #' It is also collected automatically from the `private$.additional_phash_input()` function, as well as the `$param_set$values` field. #' +#' The information from 3. should be made available through the `additional_configuration` construction argument of [`Mlr3Component`]. #' +#' @section Cloning: +#' [`Mlr3Component`] implements a `private$deep_clone()` method that automatically clones R6 objects stored directly in the object, as well as in `$param_set$values`. +#' Because of the way the `$param_set` field is handled, subclasses that need to do additional cloning should overload this function, but always call `super$deep_clone(name, value)` for values they do not handle. #' #' -#' -#' -#' -#' -#' To create a class without a `param_set`, you can pass `NULL` as the `param_set` argument. -#' `packages` should have the default value `character(0)`, meaning no packages are required. -#' `properties` should have the default value `character(0)`, meaning no properties are set. -#' -#' If the `$initialize()` method has more arguments, then it is necessary to also overload the -#' `private$.additional_phash_input()` function. This function should return either all objects, or a hash of all -#' objects, that can change the function or behavior of the `Mlr3Component` and are independent -#' of the class, the id, the `$state`, and the `$param_set$values`. The last point is particularly important: -#' changing the `$param_set$values` should -#' *not* change the return value of `private$.additional_phash_input()`. -#' #' @export Mlr3Component = R6Class("Mlr3Component", public = list( #' @description #' Construct a new `Mlr3Component`. + #' @param dict_entry (`character(1)`) + #' The entry in the dictionary by which this component can be constructed. + #' @param dict_shortaccess (`character(1)`) + #' Name of the short access function for the dictionary that this component can be found under. + #' `get(dict_shortaccess, mode = "function")(dict_entry)` should create an object of the concrete class. #' @param id (`character(1)`) - #' @param param_set ([paradox::ParamSet] | `NULL`) + #' The ID of the constructed object. + #' ID field can be used to identify objects in tables or plots, and sometimes to prefix parameter names in combined [paradox::ParamSet]s. + #' If instances of a given abstract subclass should all not have IDs, this should be set to `NULL`. + #' Should default to the value of `dict_entry` in most cases, with few exceptions for wrapper objects (e.g. a [mlr3pipelines::PipeOp] wrapping a [mlr3::Learner]). + #' @param param_set ([paradox::ParamSet] | `list` | `NULL`) + #' Parameter space description. This should be created by the subclass and given to `super$initialize()`. + #' If this is a [`ParamSet`][paradox::ParamSet], it is used for `$param_set` directly. + #' Otherwise it must be a `list` of expressions e.g. created by `alist()` that evaluate to [`ParamSet`][paradox::ParamSet]s. + #' These [`ParamSet`][paradox::ParamSet] are combined using a [`ParamSetCollection`][paradox::ParamSetCollection].\cr + #' If instances of a given abstract subclass should all not have a [paradox::ParamSet], this should be set to `NULL`. + #' Otherwise, if a concrete subclass just happens to have an empty search space, the default [paradox::ps()] should be used. #' @param packages (`character()`) + #' The packages required by the constructed object. + #' The constructor will check whether these packages can be loaded and give a warning otherwise. + #' The packages of the R6 objects in the inheritance hierarchy are automatically added and do not need to be provided here. + #' Elements of `packages` are deduplicated and made accessible as the `$packages` field. + #' @param properties (`character()`) + #' A set of properties/capabilities the object has. + #' These often need to be a subset of an entry in [mlr3::mlr_reflections]. + #' However, the [`Mlr3Component`] constructor does not check this, it needs to be asserted by an abstract inheriting class. + #' Elements are deduplicated and made accessible as the `$properties` field. + #' @param additional_configuration (`character()`) + #' Names of class fields that constitute additional configuration settings that influence the behavior of the component, but are neither construction argument, nor part of the [paradox::ParamSet]. + #' An example is the `$predict_type` field of a [mlr3::Learner]. + #' @param representable (`logical(1)`) + #' Whether the object can be represented as a simple string. + #' Should generally be `TRUE` except for objects that are constructed with a large amount of data, such as [mlr3::Task]s. initialize = function(dict_entry, dict_shortaccess, id = dict_entry, param_set = ps(), packages = character(0), properties = character(0), + additional_configuration = character(0), representable = TRUE ) { assert_string(dict_entry) @@ -109,6 +129,17 @@ Mlr3Component = R6Class("Mlr3Component", lapply(param_set, function(x) paradox::assert_param_set(eval(x))) private$.param_set_source = param_set } + + assert_character(additional_configuration, any.missing = FALSE, min.chars = 1L, unique = TRUE) + assert_subset(additional_configuration, names(self)) + assert_disjunct(additional_configuration, c( + # additional configuration can not be: + # (1) a parameter, (2) a construction argument (these are captured automatically), (3) a standard field + if (!is.null(self$param_set)) self$param_set$ids(), + names(formals(self$initialize)), + "id", "label", "param_set", "packages", "properties", "format", "print", "help", "configure", "override_info", "man", "hash", "phash" + )) + private$.additional_configuration = additional_configuration }, #' @description @@ -123,7 +154,8 @@ Mlr3Component = R6Class("Mlr3Component", }, #' @description - #' Printer with concise, unified output. + #' Printer. + #' @param ... (ignored). print = function(...) { msg_h = if (is.null(self$label) || is.na(self$label)) "" else paste0(": ", self$label) cat_cli({ @@ -141,7 +173,7 @@ Mlr3Component = R6Class("Mlr3Component", #' @description #' Set parameter values and fields in one step. - #' Named arguments overlapping with the `ParamSet` are set as parameters; + #' Named arguments overlapping with the [`ParamSet`][paradox::ParamSet] are set as parameters; #' remaining arguments are assumed to be regular fields of the object. #' @param ... (named `any`) #' @param .values (named `list()`) @@ -217,6 +249,7 @@ Mlr3Component = R6Class("Mlr3Component", #' @field label (`character(1)`) #' Human-friendly label. #' Can be used in tables, plot and text output instead of the ID. + #' Auto-generated from the title of the help page. label = function(rhs) { if (!missing(rhs)) stop("label is read-only") if (is.null(private$.label)) { @@ -240,6 +273,7 @@ Mlr3Component = R6Class("Mlr3Component", #' @field packages (`character()`) #' Set of required packages. #' These packages are loaded, but not attached. + #' Absence of these packages will generate a warning during construction. packages = function(rhs) { if (!missing(rhs)) stop("packages is read-only") private$.packages @@ -247,6 +281,9 @@ Mlr3Component = R6Class("Mlr3Component", #' @field properties (`character()`)\cr #' Stores a set of properties/capabilities the object has. + #' These are set during construction and should not be changed afterwards. + #' They may be "optimistic" in the sense that the true capabilities could depend on specific configuration parameter settings; + #' `$properties` then indicate the capabilities under favorable configuration settings. properties = function(rhs) { if (!missing(rhs)) { mlr3component_deprecation_msg("writing to properties is deprecated. Write to private$.properties if this is necessary for tests.") # nolint @@ -275,6 +312,9 @@ Mlr3Component = R6Class("Mlr3Component", #' @field man (`character(1)`) #' String in the format `[pkg]::[class name]` pointing to a manual page for this object. + #' Inferred automatically from the class name and package in which the class is defined. + #' If a concrete class is not defined in a package, the help page of its first parent class with a help page is used. + #' Can be overridden with the `$override_info()` method. man = function(rhs) { if (!missing(rhs)) { mlr3component_deprecation_msg("writing to man is deprecated") @@ -301,7 +341,8 @@ Mlr3Component = R6Class("Mlr3Component", }, #' @field hash (`character(1)`) - #' Stable hash that includes id, parameter values (if present) and `private$.extra_hash` fields. + #' Stable hash that includes id, parameter values (if present) and additional configuration settings (from construction or class fields) but not state. + #' Makes use of the `private$.additional_phash_input()` function to collect additional information, which must therefore be implemented by subclasses. hash = function(rhs) { if (!missing(rhs)) stop("hash is read-only") hash = calculate_hash(class(self), self$id, .list = c(self$param_set$values, private$.additional_phash_input())) @@ -313,7 +354,8 @@ Mlr3Component = R6Class("Mlr3Component", }, #' @field phash (`character(1)`) - #' Stable hash excluding parameter values (for tuning / partial identity), but including `private$.extra_hash`. + #' Hash that includes id and additional configuration settings (from construction or class fields) but not parameter values and no state. + #' Makes use of the `private$.additional_phash_input()` function to collect additional information, which must therefore be implemented by subclasses. phash = function(rhs) { if (!missing(rhs)) stop("phash is read-only") hash = calculate_hash( @@ -338,6 +380,7 @@ Mlr3Component = R6Class("Mlr3Component", .label = NULL, .man = NULL, .hashmap = NULL, + .additional_configuration = NULL, .additional_phash_input = function() { if (is.null(self$initialize)) return(NULL) diff --git a/man/Mlr3Component.Rd b/man/Mlr3Component.Rd index a7c014d6..99584bb8 100644 --- a/man/Mlr3Component.Rd +++ b/man/Mlr3Component.Rd @@ -46,16 +46,13 @@ It is best if this second overload only collects the additional information not The information from 1., 2., and 3. together is contained in the \verb{$phash} value. It is also collected automatically from the \code{private$.additional_phash_input()} function, as well as the \verb{$param_set$values} field. -To create a class without a \code{param_set}, you can pass \code{NULL} as the \code{param_set} argument. -\code{packages} should have the default value \code{character(0)}, meaning no packages are required. -\code{properties} should have the default value \code{character(0)}, meaning no properties are set. +The information from 3. should be made available through the \code{additional_configuration} construction argument of \code{\link{Mlr3Component}}. +} + +\section{Cloning}{ -If the \verb{$initialize()} method has more arguments, then it is necessary to also overload the -\code{private$.additional_phash_input()} function. This function should return either all objects, or a hash of all -objects, that can change the function or behavior of the \code{Mlr3Component} and are independent -of the class, the id, the \verb{$state}, and the \verb{$param_set$values}. The last point is particularly important: -changing the \verb{$param_set$values} should -\emph{not} change the return value of \code{private$.additional_phash_input()}. +\code{\link{Mlr3Component}} implements a \code{private$deep_clone()} method that automatically clones R6 objects stored directly in the object, as well as in \verb{$param_set$values}. +Because of the way the \verb{$param_set} field is handled, subclasses that need to do additional cloning should overload this function, but always call \code{super$deep_clone(name, value)} for values they do not handle. } \section{Active bindings}{ @@ -67,26 +64,36 @@ Used in tables, plot and text output.} \item{\code{label}}{(\code{character(1)}) Human-friendly label. -Can be used in tables, plot and text output instead of the ID.} +Can be used in tables, plot and text output instead of the ID. +Auto-generated from the title of the help page.} \item{\code{packages}}{(\code{character()}) Set of required packages. -These packages are loaded, but not attached.} +These packages are loaded, but not attached. +Absence of these packages will generate a warning during construction.} \item{\code{properties}}{(\code{character()})\cr -Stores a set of properties/capabilities the object has.} +Stores a set of properties/capabilities the object has. +These are set during construction and should not be changed afterwards. +They may be "optimistic" in the sense that the true capabilities could depend on specific configuration parameter settings; +\verb{$properties} then indicate the capabilities under favorable configuration settings.} \item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{NULL}) Set of hyperparameters.} \item{\code{man}}{(\code{character(1)}) -String in the format \verb{[pkg]::[class name]} pointing to a manual page for this object.} +String in the format \verb{[pkg]::[class name]} pointing to a manual page for this object. +Inferred automatically from the class name and package in which the class is defined. +If a concrete class is not defined in a package, the help page of its first parent class with a help page is used. +Can be overridden with the \verb{$override_info()} method.} \item{\code{hash}}{(\code{character(1)}) -Stable hash that includes id, parameter values (if present) and \code{private$.extra_hash} fields.} +Stable hash that includes id, parameter values (if present) and additional configuration settings (from construction or class fields) but not state. +Makes use of the \code{private$.additional_phash_input()} function to collect additional information, which must therefore be implemented by subclasses.} \item{\code{phash}}{(\code{character(1)}) -Stable hash excluding parameter values (for tuning / partial identity), but including \code{private$.extra_hash}.} +Hash that includes id and additional configuration settings (from construction or class fields) but not parameter values and no state. +Makes use of the \code{private$.additional_phash_input()} function to collect additional information, which must therefore be implemented by subclasses.} } \if{html}{\out{}} } @@ -115,6 +122,7 @@ Construct a new \code{Mlr3Component}. param_set = ps(), packages = character(0), properties = character(0), + additional_configuration = character(0), representable = TRUE )}\if{html}{\out{}} } @@ -122,11 +130,46 @@ Construct a new \code{Mlr3Component}. \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{id}}{(\code{character(1)})} +\item{\code{dict_entry}}{(\code{character(1)}) +The entry in the dictionary by which this component can be constructed.} + +\item{\code{dict_shortaccess}}{(\code{character(1)}) +Name of the short access function for the dictionary that this component can be found under. +\code{get(dict_shortaccess, mode = "function")(dict_entry)} should create an object of the concrete class.} + +\item{\code{id}}{(\code{character(1)}) +The ID of the constructed object. +ID field can be used to identify objects in tables or plots, and sometimes to prefix parameter names in combined \link[paradox:ParamSet]{paradox::ParamSet}s. +If instances of a given abstract subclass should all not have IDs, this should be set to \code{NULL}. +Should default to the value of \code{dict_entry} in most cases, with few exceptions for wrapper objects (e.g. a \link[mlr3pipelines:PipeOp]{mlr3pipelines::PipeOp} wrapping a \link[mlr3:Learner]{mlr3::Learner}).} + +\item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{list} | \code{NULL}) +Parameter space description. This should be created by the subclass and given to \code{super$initialize()}. +If this is a \code{\link[paradox:ParamSet]{ParamSet}}, it is used for \verb{$param_set} directly. +Otherwise it must be a \code{list} of expressions e.g. created by \code{alist()} that evaluate to \code{\link[paradox:ParamSet]{ParamSet}}s. +These \code{\link[paradox:ParamSet]{ParamSet}} are combined using a \code{\link[paradox:ParamSetCollection]{ParamSetCollection}}.\cr +If instances of a given abstract subclass should all not have a \link[paradox:ParamSet]{paradox::ParamSet}, this should be set to \code{NULL}. +Otherwise, if a concrete subclass just happens to have an empty search space, the default \code{\link[paradox:ps]{paradox::ps()}} should be used.} + +\item{\code{packages}}{(\code{character()}) +The packages required by the constructed object. +The constructor will check whether these packages can be loaded and give a warning otherwise. +The packages of the R6 objects in the inheritance hierarchy are automatically added and do not need to be provided here. +Elements of \code{packages} are deduplicated and made accessible as the \verb{$packages} field.} -\item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{NULL})} +\item{\code{properties}}{(\code{character()}) +A set of properties/capabilities the object has. +These often need to be a subset of an entry in \link[mlr3:mlr_reflections]{mlr3::mlr_reflections}. +However, the \code{\link{Mlr3Component}} constructor does not check this, it needs to be asserted by an abstract inheriting class. +Elements are deduplicated and made accessible as the \verb{$properties} field.} -\item{\code{packages}}{(\code{character()})} +\item{\code{additional_configuration}}{(\code{character()}) +Names of class fields that constitute additional configuration settings that influence the behavior of the component, but are neither construction argument, nor part of the \link[paradox:ParamSet]{paradox::ParamSet}. +An example is the \verb{$predict_type} field of a \link[mlr3:Learner]{mlr3::Learner}.} + +\item{\code{representable}}{(\code{logical(1)}) +Whether the object can be represented as a simple string. +Should generally be \code{TRUE} except for objects that are constructed with a large amount of data, such as \link[mlr3:Task]{mlr3::Task}s.} } \if{html}{\out{
}} } @@ -152,11 +195,18 @@ Helper for print outputs. \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Mlr3Component-print}{}}} \subsection{Method \code{print()}}{ -Printer with concise, unified output. +Printer. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Mlr3Component$print(...)}\if{html}{\out{
}} } +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(ignored).} +} +\if{html}{\out{
}} +} } \if{html}{\out{
}} \if{html}{\out{}} @@ -173,7 +223,7 @@ Opens the corresponding help page referenced by field \verb{$man}. \if{latex}{\out{\hypertarget{method-Mlr3Component-configure}{}}} \subsection{Method \code{configure()}}{ Set parameter values and fields in one step. -Named arguments overlapping with the \code{ParamSet} are set as parameters; +Named arguments overlapping with the \code{\link[paradox:ParamSet]{ParamSet}} are set as parameters; remaining arguments are assumed to be regular fields of the object. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Mlr3Component$configure(..., .values = list())}\if{html}{\out{
}} From 1daa85b2c31b203c5e409ada6e599a3b563b023e Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 16 Aug 2025 11:40:42 +0200 Subject: [PATCH 16/22] first try at autotest --- R/Mlr3Component.R | 25 +- inst/testthat/helper_test_mlr3component.R | 422 ++++++++++++++++++++++ 2 files changed, 426 insertions(+), 21 deletions(-) create mode 100644 inst/testthat/helper_test_mlr3component.R diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index fe18f673..3c878051 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -91,7 +91,8 @@ Mlr3Component = R6Class("Mlr3Component", additional_configuration = character(0), representable = TRUE ) { - assert_string(dict_entry) + private$.dict_entry = assert_string(dict_entry) + private$.dict_shortaccess = assert_string(dict_shortaccess) private$.has_id = !is.null(id) if (private$.has_id) { self$id = id @@ -371,6 +372,8 @@ Mlr3Component = R6Class("Mlr3Component", ), private = list( + .dict_entry = NULL, + .dict_shortaccess = NULL, .has_id = NULL, .id = NULL, .param_set = NULL, @@ -413,26 +416,6 @@ The hash and phash of a class must differ when it represents a different operati ) ) - - -# TODO -# - dict_entry: own id in the dictionary -# - dict_shortaccess: dictionary short access name -# - autotest: construction vars are also active bindings -# - autotest: manual name congruent with dictionary -# - own param set thing -# - autotests from miesmuschel -# - what to do with composite objects? - -# - cloning -# - conversion objects -# - dict_entry NULL -> not in dictionary, constructed via as_learner etc. -# - auto packages -# - warning if dict_entry is wrong -# - but only if constructed via a package; maybe check 'mlr3'-packages first. -# - label & man deprecation -# - what to do with non-algorithm classes (objective, resamplingresult, databackend, etc)? - #' @title Deprecation Message related to the `Mlr3Component` Class #' #' @description diff --git a/inst/testthat/helper_test_mlr3component.R b/inst/testthat/helper_test_mlr3component.R new file mode 100644 index 00000000..06b9b137 --- /dev/null +++ b/inst/testthat/helper_test_mlr3component.R @@ -0,0 +1,422 @@ +#' @title Mlr3Component Autotest Suite +#' +#' @description +#' Autotests for [`Mlr3Component`] subclasses. +#' +#' @details +#' Run `expect_mlr3component_subclass()` that verify various assumptions that subclasses of [`Mlr3Component`] should +#' fulfill. +#' +#' @param compclass (`character(1)`) +#' The class of the component to test. +#' @param constargs (`list`) +#' A list of construction arguments to pass to the component. +#' @param expect_congruent_man (`logical(1)`) +#' Whether to expect the `man` field to be of the form `package::_`. +#' @param check_package_export (`logical(1)`) +#' Whether to check that the component is exported from the package. +#' @param dict_package (`character(1)` | `environment`) +#' The package that contains the dictionary. +#' If `NULL`, the package of the component is used. +#' An `environment` can also be passed, which will then be used directly. +#' +#' @export +expect_mlr3component_subclass = function(compclass, constargs, check_congruent_man = TRUE, check_package_export = TRUE, + dict_package = NULL +) { + + checkmate::assert_list(constargs, names = "named") + checkmate::assert_flag(check_congruent_man) + checkmate::assert_flag(check_package_export) + checkmate::assert( + checkmate::check_string(dict_package, null.ok = TRUE), + checkmate::check_environment(dict_package, null.ok = TRUE) + ) + + old_options = options(mlr3.on_deprecated_mlr3component = "error") + on.exit(options(old_options)) + + checkmate::expect_class(compclass, "R6ClassGenerator") + + object = do.call(compclass$new, constargs) + + checkmate::expect_r6(object, "Mlr3Component", cloneable = TRUE, + public = c("id", "label", "param_set", "packages", "properties", "hash", "phash", + "format", "print", "help", "configure", "override_info", "man", "initialize" + ), + private = c(".additional_phash_input", "deep_clone", ".additional_configuration", ".dict_entry", ".representable") + ) + + + checkmate::expect_string(object$id, min.chars = 1L) + checkmate::expect_string(object$label, min.chars = 1L) + checkmate::expect_class(object$param_set, "ParamSet", null.ok = TRUE) + checkmate::expect_character(object$packages, any.missing = FALSE, min.chars = 1L) + checkmate::expect_character(object$properties, any.missing = FALSE, min.chars = 1L) + checkmate::expect_string(object$hash, min.chars = 1L) + checkmate::expect_string(object$phash, min.chars = 1L) + checkmate::expect_string(object$man, pattern = "^[^:]+::[^:]+$") + + checkmate::expect_flag(get_private(object)$.has_id) + checkmate::expect_string(get_private(object)$.dict_entry, min.chars = 1L) + checkmate::expect_string(get_private(object)$.dict_shortaccess, min.chars = 1L) + checkmate::expect_character(get_private(object)$.additional_configuration, + any.missing = FALSE, min.chars = 1L, unique = TRUE) + checkmate::expect_flag(get_private(object)$.representable) + + dict_entry = get_private(object)$.dict_entry + dict_shortaccess = get_private(object)$.dict_shortaccess + additional_configuration = get_private(object)$.additional_configuration + + all_fields_list = list() + recurse_class = compclass + while (!is.null(recurse_class)) { + all_fields_list[[length(all_fields_list) + 1]] = c(names(recurse_class$public_fields), names(recurse_class$active)) + recurse_class = recurse_class$get_inherit() + } + all_fields = unique(unlist(all_fields_list)) + # the following fields are part of the base class and can therefore not be part of the additional configuration + all_fields = setdiff(all_fields, c("man", "properties", "packages", "hash", "phash", "id", "label", "param_set")) + checkmate::expect_subset(additional_configuration, all_fields, + info = "additional_configuration is a subset of all valid fields") + checkmate::expect_subset(names(formals(object$initialize)), all_fields, + info = "construction arguments are accessible as fields") + checkmate::expect_disjunct(names(formals(object$initialize)), additional_configuration, + info = "construction arguments and additional_configuration should be disjoint") + if (!is.null(object$param_set)) { + checkmate::expect_disjunct(names(formals(object$initialize)), object$param_set$ids(), + info = "construction arguments and param_set IDs should be disjoint") + checkmate::expect_disjunct(additional_configuration, object$param_set$ids(), + info = "additional_configuration and param_set IDs should be disjoint") + } + + testthat::expect_output(print(object), object$id, fixed = TRUE, info = "print output contains id") + + checkmate::expect_string(object$format(), pattern = "^<[^>]+>$") + + testthat::expect_true(isTRUE(get_private(object)$.has_id) || identical(dict_entry, object$id)) + testthat::expect_true(is.null(object$param_set) || is.null(object$param_set$values)) + + oldhash = object$hash + oldphash = object$phash + + if (isTRUE(get_private(object)$.has_id)) { + oldid = object$id + object$id = "newid" + testthat::expect_equal(object$id, "newid", info = "id can be set") + testthat::expect_true(object$hash != oldhash, info = "hash changes with id") + testthat::expect_true(object$phash != oldphash, info = "phash changes with id") + object$id = oldid + testthat::expect_equal(object$id, oldid, info = "id can be reset") + testthat::expect_equal(object$hash, oldhash, info = "hash is reset by id reset") + testthat::expect_equal(object$phash, oldphash, info = "phash is reset by id reset") + object$configure(id = "newid2") + testthat::expect_equal(object$id, "newid2", info = "id can be set via configure") + object$configure(id = oldid) + testthat::expect_equal(object$id, oldid, info = "id can be reset via configure") + + } + + object2 = do.call(compclass$new, constargs) + object_clone = object$clone(deep = TRUE) + + expect_deep_clone(object, object2) + expect_deep_clone(object, object_clone) + + eligibleparams = NULL + + + + if (!is.null(object$param_set) || length(object$param_set$ids()) > 0L) { + # we now check if configuration parameters can be changed directly and through configure and whether that affects + # hash but not phash. We do this by automatically generating a parameter value that deviates from the automatically + # constructed one. However, for ParamUty we can't do that, so if there are only 'ParamUty' parameters we skip this + # part. + tops = object$param_set + eligibleparams = which( + tops$class != "ParamUty" & + # filter out discrete params with only one level, or the numeric parameters with $lower == $upper + # Note that numeric parameters have 0 levels, and discrete parameters have $lower == $upper (== NA) + ( + (!is.na(tops$lower) & tops$lower != tops$upper) | + (is.finite(tops$nlevels) & tops$nlevels > 1) + ) + ) + } + if (length(eligibleparams)) { + testingparam = tops$ids()[[eligibleparams[[1]]]] + origval = object$param_set$values[[testingparam]] + if (!is.atomic(origval)) origval = NULL + if (tops$class[[testingparam]] %in% c("ParamLgl", "ParamFct")) { + candidates = tops$levels[[testingparam]] + } else { + candidates = Filter(function(x) is.finite(x) && !is.na(x), + c(tops$lower[[testingparam]], tops$upper[[testingparam]], tops$lower[[testingparam]] + 1, 0, origval + 1)) + } + val = setdiff(candidates, origval)[1] + parvals = list(val) + names(parvals) = testingparam + + parvals_orig = list(origval) + names(parvals_orig) = testingparam + + object$param_set$values[[testingparam]] = val + testthat::expect_equal(object$param_set$values[[testingparam]], val, info = "parameters can be set directly") + changedhash = object$hash + testthat::expect_false(object$hash == oldhash, info = "hash changes with parameter set") + testthat::expect_equal(object$phash, oldphash, info = "phash is unaffected by parameter set") + testthat::expect_equal(object_clone$param_set$values[[testingparam]], origval, + info = "params of cloned objects are distinct") + object_changed_params = object$clone(deep = TRUE) + + object$param_set$values[[testingparam]] = origval + testthat::expect_equal(object$param_set$values[[testingparam]], origval, info = "parameters can be reset directly") + testthat::expect_equal(object$hash, oldhash, info = "hash is changed back by resetting parameter") + testthat::expect_equal(object$phash, oldphash, info = "phash is unaffected by parameter set reset") + + object$configure(.values = parvals) + testthat::expect_equal(object$param_set$values[[testingparam]], val, + info = "configure can set parameters via .values") + testthat::expect_equal(object$hash, changedhash, info = "hash changes with parameter set through configure") + testthat::expect_equal(object$phash, oldphash, info = "phash is unaffected by parameter set through configure") + testthat::expect_equal(object_clone$param_set$values[[testingparam]], origval, + info = "params of cloned objects are distinct") + object$configure(.values = parvals_orig) + testthat::expect_equal(object$param_set$values[[testingparam]], origval, + info = "configure can reset parameters via .values") + testthat::expect_equal(object$hash, oldhash, + info = "hash is unaffected by parameter set reset through configure") + testthat::expect_equal(object$phash, oldphash, + info = "phash is unaffected by parameter set reset through configure") + + + do.call(object$configure, parvals) + testthat::expect_equal(object$param_set$values[[testingparam]], val, info = "configure can set parameters via ...") + testthat::expect_equal(object$hash, changedhash, info = "hash changes with parameter set through configure") + testthat::expect_equal(object$phash, oldphash, info = "phash is unaffected by parameter set through configure") + testthat::expect_equal(object_clone$param_set$values[[testingparam]], origval, + info = "params of cloned objects are distinct") + do.call(object$configure, parvals_orig) + testthat::expect_equal(object$param_set$values[[testingparam]], origval, + info = "configure can reset parameters via ...") + testthat::expect_equal(object$hash, oldhash, + info = "hash is unaffected by parameter set reset through configure") + testthat::expect_equal(object$phash, oldphash, + info = "phash is unaffected by parameter set reset through configure") + + } + + if (is.null(dict_package)) { + dict_environment = topenv(object$.__enclos_env__) + } else if (is.environment(dict_package)) { + dict_environment = dict_package + } else { + dict_environment = asNamespace(dict_package) + } + + dict_constructor = get(dict_shortaccess, mode = "function", envir = dict_environment) + # dict_constructor is something like 'lrn', 'po', 'flt' etc. + checkmate::expect_function(dict_constructor) + + dictionary = dict_constructor() + checkmate::expect_r6(dictionary, "Dictionary") # expect an mlr3misc::Dictionary here + name_of_dictionary = Filter(function(x) identical(dict_environment[[x]], dictionary), names(dict_environment))[1] + testthat::expect_string(name_of_dictionary, min.chars = 1L, info = "name of dictionary in dict_environment") + + + dict_object = do.call(function(...) dict_constructor(dict_entry, ...), constargs) + testthat::expect_equal(dict_object, object, info = "object from dictionary is congruent with object") + if (length(eligibleparams)) { + dict_object2 = do.call(dict_constructor, c(list(dict_entry), insert_named(constargs, parvals))) + testthat::expect_equal(dict_object2, object_changed_params, + info = "object from dictionary constructed with changed parameters") + + dict_object3 = do.call(dict_constructor, c(list(dict_entry), insert_named(constargs, parvals_orig))) + testthat::expect_equal(dict_object3, object, info = "object from dictionary constructed with original parameter") + } + + if (check_congruent_man || check_package_export) { + expected_package = topenv(object$.__enclos_env__)$.__NAMESPACE__.$spec[["name"]] + } + + if (check_congruent_man) { + testthat::expect_equal(object$man, + sprintf("%s::%s_%s", expected_package, name_of_dictionary, dict_entry), + info = "man is congruent with dict_name and dict_entry") + } + + if (check_package_export) { + nspath = dirname(system.file("NAMESPACE", package = expected_package)) + exports = parseNamespaceFile(basename(nspath), dirname(nspath))$exports + testthat::expect_true(class(object)[[1]] %in% exports, info = "class is exported") + } + + construction_conf_objects = object[names(formals(object$initialize))] + additional_conf_objects = object[additional_configuration] + + # constructing and configuring with original configuration values should not have an effect + + object_explicit_construction = do.call(compclass$new, construction_conf_objects) + testthat::expect_equal(object_explicit_construction, object, + info = "construction with arguments retrieved from formals of initialize is equivalent to construction without" + ) + + object3 = do.call(compclass$new, constargs) + + object3$configure(.values = additional_conf_objects) + testthat::expect_equal(object3, object, info = "configure with additional configuration does not change object") + + dict_object4 = do.call(dict_constructor, c(list(dict_entry), construction_conf_objects, additional_conf_objects)) + testthat::expect_equal(dict_object4, object, + info = "object can be constructed from initialize-formals and additional_configuration") + + if (!is.null(object$param_set)) { + dict_object5 = do.call(dict_constructor, + c(list(dict_entry), construction_conf_objects, additional_conf_objects, object$param_set$values)) + testthat::expect_equal(dict_object5, object, + info = "object can be constructed from initialize-formals, additional_configuration, and parameter set") + } +} + +#' @title Test many Mlr3Component subclasses +#' +#' @description +#' For a given list of Mlr3Component subclasses, run [expect_mlr3component_subclass()] for each of them. +#' +#' This function calls [testthat::test_that()], so it should *not* be run inside a `test_that()`-block. +#' Instead, it should be run at top level in a test file directly. +#' +#' @param compclasses (`list` of `R6ClassGenerator`) +#' The list of Mlr3Component subclasses to test. +#' It is a good idea to auto-generate this, see examples. +#' @param dict_constargs (`list`) +#' A list of lists of construction arguments to pass to the dictionary. +#' This list should be named by the class of the component for which they apply, but does not need to be exhaustive. +#' Components not mentioned in the list get empty construction arguments by default. +#' @param check_congruent_man (`logical(1)`) +#' Whether to check that the `man` field is congruent with the dictionary name and entry. +#' @param check_package_export (`logical(1)`) +#' Whether to check that the component is exported from the package. +#' @param dict_package (`character(1)` | `environment`) +#' The package that contains the dictionary. +#' If `NULL`, the package of the component is used. +#' An `environment` can also be passed, which will then be used directly. +#' +#' @examples +#' abstracts = c("PipeOp", "PipeOpEnsemble") +#' nspath = dirname(system.file("NAMESPACE", package = "mlr3pipelines")) +#' exports = parseNamespaceFile(basename(nspath), dirname(nspath))$exports +#' compclass_names = grep(exports, pattern = "^PipeOp", value = TRUE) +#' compclasses = lapply(compclass_names, get, envir = asNamespace("mlr3pipelines")) +#' dict_constargs = list( +#' PipeOpImputeLearner = list(learner = mlr_learners$get("classif.rpart")), +#' PipeOpLearner = list(learner = mlr_learners$get("classif.rpart")), +#' PipeOpLearnerCV = list(learner = mlr_learners$get("classif.rpart")), +#' ) +#' test_that_mlr3component_dict(compclasses, dict_constargs, dict_package = "mlr3pipelines") +#' @export + +test_that_mlr3component_dict = function(compclasses, dict_constargs, check_congruent_man = TRUE, + check_package_export = TRUE, dict_package = NULL +) { + for (compclass in compclasses) { + clname = compclass$classname + if (is.null(clname)) { + clname = "ERROR" # we don't want top level code to fail. + } + testthat::test_that(sprintf("Mlr3Component subclass %s", clname), { + expect_mlr3component_subclass( + compclass = compclass, + constargs = dict_constargs[[clname]], + check_congruent_man = check_congruent_man, + check_package_export = check_package_export, + dict_package = dict_package + ) + }) + } +} + + +#' @title Expect that 'one' is a deep clone of 'two' +#' +#' @description +#' Expect that 'one' is a deep clone of 'two'. +#' +#' @param one (`any`) +#' The first object to compare. +#' @param two (`any`) +#' The second object to compare. +#' +#' @export +expect_deep_clone = function(one, two) { + # is equal + testthat::expect_equal(one, two) + visited = new.env() # nolint + visited_b = new.env() # nolint + expect_references_differ = function(a, b, path) { + + force(path) + if (length(path) > 400) { + stop("Recursion too deep in expect_deep_clone()") + } + + # don't go in circles + addr_a = data.table::address(a) + addr_b = data.table::address(b) + if (!is.null(visited[[addr_a]])) { + return(invisible(NULL)) + } + visited[[addr_a]] = path + visited_b[[addr_b]] = path + + # follow attributes, even for non-recursive objects + if (utils::tail(path, 1) != "[attributes]" && !is.null(base::attributes(a))) { + testthat::expect_references_differ(base::attributes(a), base::attributes(b), c(path, "[attributes]")) + } + + # don't recurse if there is nowhere to go + if (!base::is.recursive(a)) { + return(invisible(NULL)) + } + + # check that environments differ + if (base::is.environment(a)) { + # some special environments + if (identical(a, baseenv()) || identical(a, globalenv()) || identical(a, emptyenv())) { + return(invisible(NULL)) + } + if (length(path) > 1 && R6::is.R6(a) && "clone" %nin% names(a)) { + return(invisible(NULL)) # don't check if smth is not cloneable + } + label = sprintf("Object addresses differ at path %s", paste0(path, collapse = "->")) + testthat::expect_true(addr_a != addr_b, label = label) + testthat::expect_null(visited_b[[addr_a]], label = label) + } else { + a <- unclass(a) + b <- unclass(b) + } + + # recurse + if (base::is.function(a)) { + return(invisible(NULL)) + ## # maybe this is overdoing it + ## expect_references_differ(base::formals(a), base::formals(b), c(path, "[function args]")) + ## expect_references_differ(base::body(a), base::body(b), c(path, "[function body]")) + } + objnames = base::names(a) + if (is.null(objnames) || anyDuplicated(objnames)) { + index = seq_len(base::length(a)) + } else { + index = objnames + if (base::is.environment(a)) { + index = Filter(function(x) !bindingIsActive(x, a), index) + } + } + for (i in index) { + if (utils::tail(path, 1) == "[attributes]" && i %in% c("srcref", "srcfile", ".Environment")) next + expect_references_differ(base::`[[`(a, i), base::`[[`(b, i), c(path, sprintf("[element %s]%s", i, + if (!is.null(objnames)) sprintf(" '%s'", if (is.character(index)) i else objnames[[i]]) else ""))) + } + } + expect_references_differ(one, two, "ROOT") +} From ec908ec25197fddcd16dcf15eb38a44ef7f6e526 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 16 Aug 2025 13:26:20 +0200 Subject: [PATCH 17/22] documentation and tests --- DESCRIPTION | 3 +- R/Mlr3Component.R | 73 +++++++++---- inst/testthat/helper_test_mlr3component.R | 15 ++- man/Mlr3Component.Rd | 48 ++++++--- tests/testthat/helper.R | 4 + tests/testthat/test_Mlr3Component.R | 120 ++++++++++++++++++++++ 6 files changed, 216 insertions(+), 47 deletions(-) create mode 100644 tests/testthat/test_Mlr3Component.R diff --git a/DESCRIPTION b/DESCRIPTION index 0f32c62d..1d0a9727 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,8 @@ Suggests: evaluate, mirai, paradox, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + utils Config/testthat/edition: 3 Config/testthat/parallel: true Encoding: UTF-8 diff --git a/R/Mlr3Component.R b/R/Mlr3Component.R index 3c878051..a1464c3d 100644 --- a/R/Mlr3Component.R +++ b/R/Mlr3Component.R @@ -60,32 +60,42 @@ Mlr3Component = R6Class("Mlr3Component", #' `get(dict_shortaccess, mode = "function")(dict_entry)` should create an object of the concrete class. #' @param id (`character(1)`) #' The ID of the constructed object. - #' ID field can be used to identify objects in tables or plots, and sometimes to prefix parameter names in combined [paradox::ParamSet]s. + #' ID field can be used to identify objects in tables or plots, and sometimes to prefix parameter names in + #' combined [paradox::ParamSet]s. #' If instances of a given abstract subclass should all not have IDs, this should be set to `NULL`. - #' Should default to the value of `dict_entry` in most cases, with few exceptions for wrapper objects (e.g. a [mlr3pipelines::PipeOp] wrapping a [mlr3::Learner]). + #' Should default to the value of `dict_entry` in most cases, with few exceptions for wrapper objects (e.g. a + #' [mlr3pipelines::PipeOp] wrapping a [mlr3::Learner]). #' @param param_set ([paradox::ParamSet] | `list` | `NULL`) #' Parameter space description. This should be created by the subclass and given to `super$initialize()`. #' If this is a [`ParamSet`][paradox::ParamSet], it is used for `$param_set` directly. - #' Otherwise it must be a `list` of expressions e.g. created by `alist()` that evaluate to [`ParamSet`][paradox::ParamSet]s. - #' These [`ParamSet`][paradox::ParamSet] are combined using a [`ParamSetCollection`][paradox::ParamSetCollection].\cr - #' If instances of a given abstract subclass should all not have a [paradox::ParamSet], this should be set to `NULL`. - #' Otherwise, if a concrete subclass just happens to have an empty search space, the default [paradox::ps()] should be used. + #' Otherwise it must be a `list` of expressions e.g. created by `alist()` that evaluate to + #' [`ParamSet`][paradox::ParamSet]s. + #' These [`ParamSet`][paradox::ParamSet] are combined using a + #' [`ParamSetCollection`][paradox::ParamSetCollection].\cr + #' If instances of a given abstract subclass should all not have a [paradox::ParamSet], this should be set to + #' `NULL`. + #' Otherwise, if a concrete subclass just happens to have an empty search space, the default [paradox::ps()] + #' should be used. #' @param packages (`character()`) #' The packages required by the constructed object. #' The constructor will check whether these packages can be loaded and give a warning otherwise. - #' The packages of the R6 objects in the inheritance hierarchy are automatically added and do not need to be provided here. + #' The packages of the R6 objects in the inheritance hierarchy are automatically added and do not need to be + #' provided here. #' Elements of `packages` are deduplicated and made accessible as the `$packages` field. #' @param properties (`character()`) #' A set of properties/capabilities the object has. #' These often need to be a subset of an entry in [mlr3::mlr_reflections]. - #' However, the [`Mlr3Component`] constructor does not check this, it needs to be asserted by an abstract inheriting class. + #' However, the [`Mlr3Component`] constructor does not check this, it needs to be asserted by an abstract + #' inheriting class. #' Elements are deduplicated and made accessible as the `$properties` field. #' @param additional_configuration (`character()`) - #' Names of class fields that constitute additional configuration settings that influence the behavior of the component, but are neither construction argument, nor part of the [paradox::ParamSet]. + #' Names of class fields that constitute additional configuration settings that influence the behavior of the + #' component, but are neither construction argument, nor part of the [paradox::ParamSet]. #' An example is the `$predict_type` field of a [mlr3::Learner]. #' @param representable (`logical(1)`) #' Whether the object can be represented as a simple string. - #' Should generally be `TRUE` except for objects that are constructed with a large amount of data, such as [mlr3::Task]s. + #' Should generally be `TRUE` except for objects that are constructed with a large amount of data, such as + #' [mlr3::Task]s. initialize = function(dict_entry, dict_shortaccess, id = dict_entry, param_set = ps(), packages = character(0), properties = character(0), additional_configuration = character(0), @@ -93,6 +103,7 @@ Mlr3Component = R6Class("Mlr3Component", ) { private$.dict_entry = assert_string(dict_entry) private$.dict_shortaccess = assert_string(dict_shortaccess) + private$.representable = assert_flag(representable) private$.has_id = !is.null(id) if (private$.has_id) { self$id = id @@ -138,7 +149,8 @@ Mlr3Component = R6Class("Mlr3Component", # (1) a parameter, (2) a construction argument (these are captured automatically), (3) a standard field if (!is.null(self$param_set)) self$param_set$ids(), names(formals(self$initialize)), - "id", "label", "param_set", "packages", "properties", "format", "print", "help", "configure", "override_info", "man", "hash", "phash" + "id", "label", "param_set", "packages", "properties", "format", "print", "help", "configure", "override_info", + "man", "hash", "phash" )) private$.additional_configuration = additional_configuration }, @@ -161,7 +173,7 @@ Mlr3Component = R6Class("Mlr3Component", msg_h = if (is.null(self$label) || is.na(self$label)) "" else paste0(": ", self$label) cat_cli({ cli::cli_h1("{.cls {class(self)[1L]}} ({self$id}){msg_h}") - cli::cli_li("Parameters: {as_short_string(if (is.null(private$.param_set)) list() else private$.param_set$values, 1000L)}") + cli::cli_li("Parameters: {as_short_string(if (is.null(private$.param_set)) list() else private$.param_set$values, 1000L)}") # nolint cli::cli_li("Packages: {.pkg {if (length(self$packages)) self$packages else '-'}}") }) }, @@ -230,6 +242,7 @@ Mlr3Component = R6Class("Mlr3Component", orig_hash = self$hash private$.hashmap = structure(c(hash, hash), names = c(orig_hash, orig_phash)) } + invisible(self) } ), @@ -283,7 +296,8 @@ Mlr3Component = R6Class("Mlr3Component", #' @field properties (`character()`)\cr #' Stores a set of properties/capabilities the object has. #' These are set during construction and should not be changed afterwards. - #' They may be "optimistic" in the sense that the true capabilities could depend on specific configuration parameter settings; + #' They may be "optimistic" in the sense that the true capabilities could depend on specific configuration + #' parameter settings; #' `$properties` then indicate the capabilities under favorable configuration settings. properties = function(rhs) { if (!missing(rhs)) { @@ -314,7 +328,8 @@ Mlr3Component = R6Class("Mlr3Component", #' @field man (`character(1)`) #' String in the format `[pkg]::[class name]` pointing to a manual page for this object. #' Inferred automatically from the class name and package in which the class is defined. - #' If a concrete class is not defined in a package, the help page of its first parent class with a help page is used. + #' If a concrete class is not defined in a package, the help page of its first parent class with a help page is + #' used. #' Can be overridden with the `$override_info()` method. man = function(rhs) { if (!missing(rhs)) { @@ -325,10 +340,14 @@ Mlr3Component = R6Class("Mlr3Component", if (is.null(private$.man)) { iter = 1 env = self - repeat { - man = paste0(topenv(env$.__enclos_env__)$.__NAMESPACE__.$spec[["name"]], "::", class(self)[[iter]]) + while (!is.null(env)) { + pkgstring = topenv(env$.__enclos_env__)$.__NAMESPACE__.$spec[["name"]] + classstring = class(self)[[iter]] + man = paste0(pkgstring, "::", classstring) help_works = tryCatch({ - length(as.character(open_help(man))) > 0L + length(as.character(open_help(man))) > 0L || + NROW(utils::help.search(sprintf("^%s$", classstring), package = pkgstring, + ignore.case = FALSE, agrep = FALSE, fields = "alias")$matches) > 0L }, error = function(e) FALSE) if (help_works) { private$.man = man @@ -337,13 +356,18 @@ Mlr3Component = R6Class("Mlr3Component", iter = iter + 1 env = env$.__enclos_env__$super } + if (is.null(private$.man)) { + private$.man = "mlr3misc::Mlr3Component" + } } private$.man }, #' @field hash (`character(1)`) - #' Stable hash that includes id, parameter values (if present) and additional configuration settings (from construction or class fields) but not state. - #' Makes use of the `private$.additional_phash_input()` function to collect additional information, which must therefore be implemented by subclasses. + #' Stable hash that includes id, parameter values (if present) and additional configuration settings (from + #' construction or class fields) but not state. + #' Makes use of the `private$.additional_phash_input()` function to collect additional information, which must + #' therefore be implemented by subclasses. hash = function(rhs) { if (!missing(rhs)) stop("hash is read-only") hash = calculate_hash(class(self), self$id, .list = c(self$param_set$values, private$.additional_phash_input())) @@ -355,8 +379,10 @@ Mlr3Component = R6Class("Mlr3Component", }, #' @field phash (`character(1)`) - #' Hash that includes id and additional configuration settings (from construction or class fields) but not parameter values and no state. - #' Makes use of the `private$.additional_phash_input()` function to collect additional information, which must therefore be implemented by subclasses. + #' Hash that includes id and additional configuration settings (from construction or class fields) but not + #' parameter values and no state. + #' Makes use of the `private$.additional_phash_input()` function to collect additional information, which must + #' therefore be implemented by subclasses. phash = function(rhs) { if (!missing(rhs)) stop("phash is read-only") hash = calculate_hash( @@ -374,6 +400,7 @@ Mlr3Component = R6Class("Mlr3Component", private = list( .dict_entry = NULL, .dict_shortaccess = NULL, + .representable = NULL, .has_id = NULL, .id = NULL, .param_set = NULL, @@ -392,9 +419,11 @@ Mlr3Component = R6Class("Mlr3Component", if (length(sc) > 1 && identical(sc[[length(sc) - 1]][[1]], quote(super$.additional_phash_input))) return(NULL) initformals <- names(formals(args(self$initialize))) if (!test_subset(initformals, c("id", "param_vals"))) { + # nolint start stopf("Class %s has construction arguments besides 'id' and 'param_vals' but does not overload the private '.additional_phash_input()' function. -The hash and phash of a class must differ when it represents a different operation; since %s has construction arguments that could change the operation that is performed by it, it is necessary for the $hash and $phash to reflect this. `.additional_phash_input()` should return all the information (e.g. hashes of encapsulated items) that should additionally be hashed; read the help of ?Mlr3Component for more information.", # nolint +The hash and phash of a class must differ when it represents a different operation; since %s has construction arguments that could change the operation that is performed by it, it is necessary for the $hash and $phash to reflect this. `.additional_phash_input()` should return all the information (e.g. hashes of encapsulated items) that should additionally be hashed; read the help of ?Mlr3Component for more information.", + # nolint end class(self)[[1]], class(self)[[1]]) } }, diff --git a/inst/testthat/helper_test_mlr3component.R b/inst/testthat/helper_test_mlr3component.R index 06b9b137..68e4afa3 100644 --- a/inst/testthat/helper_test_mlr3component.R +++ b/inst/testthat/helper_test_mlr3component.R @@ -95,7 +95,6 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m checkmate::expect_string(object$format(), pattern = "^<[^>]+>$") testthat::expect_true(isTRUE(get_private(object)$.has_id) || identical(dict_entry, object$id)) - testthat::expect_true(is.null(object$param_set) || is.null(object$param_set$values)) oldhash = object$hash oldphash = object$phash @@ -221,7 +220,7 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m dictionary = dict_constructor() checkmate::expect_r6(dictionary, "Dictionary") # expect an mlr3misc::Dictionary here name_of_dictionary = Filter(function(x) identical(dict_environment[[x]], dictionary), names(dict_environment))[1] - testthat::expect_string(name_of_dictionary, min.chars = 1L, info = "name of dictionary in dict_environment") + checkmate::expect_string(name_of_dictionary, min.chars = 1L, info = "name of dictionary in dict_environment") dict_object = do.call(function(...) dict_constructor(dict_entry, ...), constargs) @@ -251,8 +250,8 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m testthat::expect_true(class(object)[[1]] %in% exports, info = "class is exported") } - construction_conf_objects = object[names(formals(object$initialize))] - additional_conf_objects = object[additional_configuration] + construction_conf_objects = mget(names(formals(object$initialize)), envir = object) + additional_conf_objects = mget(additional_configuration, envir = object) # constructing and configuring with original configuration values should not have an effect @@ -371,7 +370,7 @@ expect_deep_clone = function(one, two) { # follow attributes, even for non-recursive objects if (utils::tail(path, 1) != "[attributes]" && !is.null(base::attributes(a))) { - testthat::expect_references_differ(base::attributes(a), base::attributes(b), c(path, "[attributes]")) + Recall(base::attributes(a), base::attributes(b), c(path, "[attributes]")) } # don't recurse if there is nowhere to go @@ -400,8 +399,8 @@ expect_deep_clone = function(one, two) { if (base::is.function(a)) { return(invisible(NULL)) ## # maybe this is overdoing it - ## expect_references_differ(base::formals(a), base::formals(b), c(path, "[function args]")) - ## expect_references_differ(base::body(a), base::body(b), c(path, "[function body]")) + ## Recall(base::formals(a), base::formals(b), c(path, "[function args]")) + ## Recall(base::body(a), base::body(b), c(path, "[function body]")) } objnames = base::names(a) if (is.null(objnames) || anyDuplicated(objnames)) { @@ -414,7 +413,7 @@ expect_deep_clone = function(one, two) { } for (i in index) { if (utils::tail(path, 1) == "[attributes]" && i %in% c("srcref", "srcfile", ".Environment")) next - expect_references_differ(base::`[[`(a, i), base::`[[`(b, i), c(path, sprintf("[element %s]%s", i, + Recall(base::`[[`(a, i), base::`[[`(b, i), c(path, sprintf("[element %s]%s", i, if (!is.null(objnames)) sprintf(" '%s'", if (is.character(index)) i else objnames[[i]]) else ""))) } } diff --git a/man/Mlr3Component.Rd b/man/Mlr3Component.Rd index 99584bb8..54274423 100644 --- a/man/Mlr3Component.Rd +++ b/man/Mlr3Component.Rd @@ -75,7 +75,8 @@ Absence of these packages will generate a warning during construction.} \item{\code{properties}}{(\code{character()})\cr Stores a set of properties/capabilities the object has. These are set during construction and should not be changed afterwards. -They may be "optimistic" in the sense that the true capabilities could depend on specific configuration parameter settings; +They may be "optimistic" in the sense that the true capabilities could depend on specific configuration +parameter settings; \verb{$properties} then indicate the capabilities under favorable configuration settings.} \item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{NULL}) @@ -84,16 +85,21 @@ Set of hyperparameters.} \item{\code{man}}{(\code{character(1)}) String in the format \verb{[pkg]::[class name]} pointing to a manual page for this object. Inferred automatically from the class name and package in which the class is defined. -If a concrete class is not defined in a package, the help page of its first parent class with a help page is used. +If a concrete class is not defined in a package, the help page of its first parent class with a help page is +used. Can be overridden with the \verb{$override_info()} method.} \item{\code{hash}}{(\code{character(1)}) -Stable hash that includes id, parameter values (if present) and additional configuration settings (from construction or class fields) but not state. -Makes use of the \code{private$.additional_phash_input()} function to collect additional information, which must therefore be implemented by subclasses.} +Stable hash that includes id, parameter values (if present) and additional configuration settings (from +construction or class fields) but not state. +Makes use of the \code{private$.additional_phash_input()} function to collect additional information, which must +therefore be implemented by subclasses.} \item{\code{phash}}{(\code{character(1)}) -Hash that includes id and additional configuration settings (from construction or class fields) but not parameter values and no state. -Makes use of the \code{private$.additional_phash_input()} function to collect additional information, which must therefore be implemented by subclasses.} +Hash that includes id and additional configuration settings (from construction or class fields) but not +parameter values and no state. +Makes use of the \code{private$.additional_phash_input()} function to collect additional information, which must +therefore be implemented by subclasses.} } \if{html}{\out{}} } @@ -139,37 +145,47 @@ Name of the short access function for the dictionary that this component can be \item{\code{id}}{(\code{character(1)}) The ID of the constructed object. -ID field can be used to identify objects in tables or plots, and sometimes to prefix parameter names in combined \link[paradox:ParamSet]{paradox::ParamSet}s. +ID field can be used to identify objects in tables or plots, and sometimes to prefix parameter names in +combined \link[paradox:ParamSet]{paradox::ParamSet}s. If instances of a given abstract subclass should all not have IDs, this should be set to \code{NULL}. -Should default to the value of \code{dict_entry} in most cases, with few exceptions for wrapper objects (e.g. a \link[mlr3pipelines:PipeOp]{mlr3pipelines::PipeOp} wrapping a \link[mlr3:Learner]{mlr3::Learner}).} +Should default to the value of \code{dict_entry} in most cases, with few exceptions for wrapper objects (e.g. a +\link[mlr3pipelines:PipeOp]{mlr3pipelines::PipeOp} wrapping a \link[mlr3:Learner]{mlr3::Learner}).} \item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet} | \code{list} | \code{NULL}) Parameter space description. This should be created by the subclass and given to \code{super$initialize()}. If this is a \code{\link[paradox:ParamSet]{ParamSet}}, it is used for \verb{$param_set} directly. -Otherwise it must be a \code{list} of expressions e.g. created by \code{alist()} that evaluate to \code{\link[paradox:ParamSet]{ParamSet}}s. -These \code{\link[paradox:ParamSet]{ParamSet}} are combined using a \code{\link[paradox:ParamSetCollection]{ParamSetCollection}}.\cr -If instances of a given abstract subclass should all not have a \link[paradox:ParamSet]{paradox::ParamSet}, this should be set to \code{NULL}. -Otherwise, if a concrete subclass just happens to have an empty search space, the default \code{\link[paradox:ps]{paradox::ps()}} should be used.} +Otherwise it must be a \code{list} of expressions e.g. created by \code{alist()} that evaluate to +\code{\link[paradox:ParamSet]{ParamSet}}s. +These \code{\link[paradox:ParamSet]{ParamSet}} are combined using a +\code{\link[paradox:ParamSetCollection]{ParamSetCollection}}.\cr +If instances of a given abstract subclass should all not have a \link[paradox:ParamSet]{paradox::ParamSet}, this should be set to +\code{NULL}. +Otherwise, if a concrete subclass just happens to have an empty search space, the default \code{\link[paradox:ps]{paradox::ps()}} +should be used.} \item{\code{packages}}{(\code{character()}) The packages required by the constructed object. The constructor will check whether these packages can be loaded and give a warning otherwise. -The packages of the R6 objects in the inheritance hierarchy are automatically added and do not need to be provided here. +The packages of the R6 objects in the inheritance hierarchy are automatically added and do not need to be +provided here. Elements of \code{packages} are deduplicated and made accessible as the \verb{$packages} field.} \item{\code{properties}}{(\code{character()}) A set of properties/capabilities the object has. These often need to be a subset of an entry in \link[mlr3:mlr_reflections]{mlr3::mlr_reflections}. -However, the \code{\link{Mlr3Component}} constructor does not check this, it needs to be asserted by an abstract inheriting class. +However, the \code{\link{Mlr3Component}} constructor does not check this, it needs to be asserted by an abstract +inheriting class. Elements are deduplicated and made accessible as the \verb{$properties} field.} \item{\code{additional_configuration}}{(\code{character()}) -Names of class fields that constitute additional configuration settings that influence the behavior of the component, but are neither construction argument, nor part of the \link[paradox:ParamSet]{paradox::ParamSet}. +Names of class fields that constitute additional configuration settings that influence the behavior of the +component, but are neither construction argument, nor part of the \link[paradox:ParamSet]{paradox::ParamSet}. An example is the \verb{$predict_type} field of a \link[mlr3:Learner]{mlr3::Learner}.} \item{\code{representable}}{(\code{logical(1)}) Whether the object can be represented as a simple string. -Should generally be \code{TRUE} except for objects that are constructed with a large amount of data, such as \link[mlr3:Task]{mlr3::Task}s.} +Should generally be \code{TRUE} except for objects that are constructed with a large amount of data, such as +\link[mlr3:Task]{mlr3::Task}s.} } \if{html}{\out{}} } diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 36eadc66..95b5e581 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -9,3 +9,7 @@ expect_man_exists = function(man) { checkmate::expect_data_frame(matches$matches, min.rows = 1L, info = "man page lookup") } } + +lapply(list.files(system.file("testthat", package = "mlr3misc"), pattern = "^helper.*\\.[rR]", full.names = TRUE), + source +) diff --git a/tests/testthat/test_Mlr3Component.R b/tests/testthat/test_Mlr3Component.R new file mode 100644 index 00000000..a9969fe2 --- /dev/null +++ b/tests/testthat/test_Mlr3Component.R @@ -0,0 +1,120 @@ +skip_if_not_installed("paradox") + +MiniBaseclass = R6Class("MiniBaseclass", inherit = Mlr3Component, + public = list( + initialize = function(id, param_set = ps(), properties = character(0), + packages = character(0), additional_configuration = character(0)) { + super$initialize(dict_entry = id, dict_shortaccess = "mini", + param_set = param_set, properties = properties, packages = packages, + additional_configuration = c("some_configurable", additional_configuration) + ) + private$.some_configurable = TRUE + } + ), + active = list( + some_configurable = function(value) { + if (!missing(value)) { + private$.some_configurable = value + } + private$.some_configurable + } + ), + private = list( + .some_configurable = NULL, + .additional_phash_input = function() { + list(private$.some_configurable, super$.additional_phash_input()) + } + ) +) + +mlr_mini = R6Class("DictionaryMini", inherit = Dictionary, cloneable = FALSE, +)$new() + +mini = function(.key, ...) { + dictionary_sugar_get(dict = mlr_mini, .key, ...) +} + +MiniConcrete = R6Class("MiniConcrete", inherit = MiniBaseclass, + public = list( + initialize = function(constarg) { + super$initialize(id = "concrete", + param_set = paradox::ps(x = paradox::p_dbl(0, 10, init = 2)), properties = "xyz", + packages = "data.table", # we know this one is present + additional_configuration = "some_other_configurable" + ) + private$.constarg = constarg + private$.some_other_configurable = FALSE + } + ), + active = list( + some_other_configurable = function(value) { + if (!missing(value)) { + private$.some_other_configurable = value + } + private$.some_other_configurable + }, + constarg = function(value) { + if (!missing(value)) { + private$.constarg = value + } + private$.constarg + } + ), + private = list( + .some_other_configurable = NULL, + .constarg = NULL, + .additional_phash_input = function() { + list(private$.some_other_configurable, super$.additional_phash_input()) + } + ) +) + +mlr_mini$add("concrete", MiniConcrete) + + +test_that("Mlr3Component basic tests", { + object = mini("concrete", constarg = "testvalue") + expect_equal(object$constarg, "testvalue") + expect_equal(object$some_other_configurable, FALSE) + expect_equal(object$some_configurable, TRUE) + expect_equal(object$id, "concrete") + expect_equal(object$param_set$values, list(x = 2)) + object$id = "newid" + expect_equal(object$id, "newid") + expect_string(object$hash) + expect_string(object$phash) + expect_equal(object$format(), "") + expect_output(object$print(), " \\(newid\\)") + + object$configure(x = 3, some_configurable = FALSE, id = "newerid") + expect_equal(object$some_configurable, FALSE) + expect_equal(object$id, "newerid") + expect_equal(object$param_set$values, list(x = 3)) + + # The man page is Mlr3Component, since MiniBaseclass and MiniConcrete don't have help pages + expect_equal(object$man, "mlr3misc::Mlr3Component") + expect_equal(object$packages, "data.table") + expect_equal(object$properties, "xyz") + + object$override_info(man = "mlr3misc::MiniConcrete", hash = "abcxyz") + expect_equal(object$man, "mlr3misc::MiniConcrete") + expect_equal(object$hash, "abcxyz") + expect_equal(object$phash, "abcxyz") + object$param_set$values$x = 4 + + expect_equal(object$phash, "abcxyz") + expect_true(object$hash != "abcxyz") + + object$param_set$values$x = 3 + expect_equal(object$hash, "abcxyz") + expect_equal(object$phash, "abcxyz") +}) + + +test_that_mlr3component_dict( + compclasses = list(MiniConcrete), + dict_constargs = list(MiniConcrete = list(constarg = "testconstargval")), + check_congruent_man = FALSE, # test-class does not have correct man page value + check_package_export = FALSE, # test-class is not exported + dict_package = environment() # dictionary is in this environment +) From da988a68cfa412e2a24e8fe65071987cbd2c5574 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 16 Aug 2025 16:10:21 +0200 Subject: [PATCH 18/22] refining autotest --- inst/testthat/helper_test_mlr3component.R | 27 ++++++++++++++--------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/inst/testthat/helper_test_mlr3component.R b/inst/testthat/helper_test_mlr3component.R index 68e4afa3..9e33d419 100644 --- a/inst/testthat/helper_test_mlr3component.R +++ b/inst/testthat/helper_test_mlr3component.R @@ -67,6 +67,7 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m dict_entry = get_private(object)$.dict_entry dict_shortaccess = get_private(object)$.dict_shortaccess additional_configuration = get_private(object)$.additional_configuration + construction_arguments = as.character(names(formals(object$initialize))) all_fields_list = list() recurse_class = compclass @@ -79,12 +80,12 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m all_fields = setdiff(all_fields, c("man", "properties", "packages", "hash", "phash", "id", "label", "param_set")) checkmate::expect_subset(additional_configuration, all_fields, info = "additional_configuration is a subset of all valid fields") - checkmate::expect_subset(names(formals(object$initialize)), all_fields, + checkmate::expect_subset(construction_arguments, all_fields, info = "construction arguments are accessible as fields") - checkmate::expect_disjunct(names(formals(object$initialize)), additional_configuration, + checkmate::expect_disjunct(construction_arguments, additional_configuration, info = "construction arguments and additional_configuration should be disjoint") if (!is.null(object$param_set)) { - checkmate::expect_disjunct(names(formals(object$initialize)), object$param_set$ids(), + checkmate::expect_disjunct(construction_arguments, object$param_set$ids(), info = "construction arguments and param_set IDs should be disjoint") checkmate::expect_disjunct(additional_configuration, object$param_set$ids(), info = "additional_configuration and param_set IDs should be disjoint") @@ -139,7 +140,8 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m ( (!is.na(tops$lower) & tops$lower != tops$upper) | (is.finite(tops$nlevels) & tops$nlevels > 1) - ) + ) & + !map_lgl(tops$values[tops$ids()], is.null) ) } if (length(eligibleparams)) { @@ -239,9 +241,14 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m } if (check_congruent_man) { - testthat::expect_equal(object$man, - sprintf("%s::%s_%s", expected_package, name_of_dictionary, dict_entry), - info = "man is congruent with dict_name and dict_entry") + checkmate::expect_string(object$man, pattern = sprintf("^%s::[^:]+$", expected_package)) + help_info = strsplit(object$man, "::")[[1]] + help_topic = utils::help.search(sprintf("^%s$", help_info[[2]]), package = help_info[[1]], ignore.case = FALSE, + agrep = FALSE, fields = "alias")$matches$Topic + testthat::expect_true(length(help_topic) > 0L, info = "help page exists") + testthat::expect_equal(help_topic, + sprintf("%s_%s", name_of_dictionary, dict_entry), + info = "help page name is congruent with dict_name and dict_entry") } if (check_package_export) { @@ -250,7 +257,7 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m testthat::expect_true(class(object)[[1]] %in% exports, info = "class is exported") } - construction_conf_objects = mget(names(formals(object$initialize)), envir = object) + construction_conf_objects = mget(construction_arguments, envir = object) additional_conf_objects = mget(additional_configuration, envir = object) # constructing and configuring with original configuration values should not have an effect @@ -305,7 +312,7 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m #' abstracts = c("PipeOp", "PipeOpEnsemble") #' nspath = dirname(system.file("NAMESPACE", package = "mlr3pipelines")) #' exports = parseNamespaceFile(basename(nspath), dirname(nspath))$exports -#' compclass_names = grep(exports, pattern = "^PipeOp", value = TRUE) +#' compclass_names = setdiff(grep(exports, pattern = "^PipeOp", value = TRUE), abstract_classes) #' compclasses = lapply(compclass_names, get, envir = asNamespace("mlr3pipelines")) #' dict_constargs = list( #' PipeOpImputeLearner = list(learner = mlr_learners$get("classif.rpart")), @@ -326,7 +333,7 @@ test_that_mlr3component_dict = function(compclasses, dict_constargs, check_congr testthat::test_that(sprintf("Mlr3Component subclass %s", clname), { expect_mlr3component_subclass( compclass = compclass, - constargs = dict_constargs[[clname]], + constargs = c(list(), dict_constargs[[clname]]), check_congruent_man = check_congruent_man, check_package_export = check_package_export, dict_package = dict_package From b8760b4a5f9107b1188f721d6752d8eee749eae1 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 16 Aug 2025 16:48:17 +0200 Subject: [PATCH 19/22] don't assume get_private() available in helper function --- inst/testthat/helper_test_mlr3component.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/inst/testthat/helper_test_mlr3component.R b/inst/testthat/helper_test_mlr3component.R index 9e33d419..06440cda 100644 --- a/inst/testthat/helper_test_mlr3component.R +++ b/inst/testthat/helper_test_mlr3component.R @@ -57,16 +57,16 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m checkmate::expect_string(object$phash, min.chars = 1L) checkmate::expect_string(object$man, pattern = "^[^:]+::[^:]+$") - checkmate::expect_flag(get_private(object)$.has_id) - checkmate::expect_string(get_private(object)$.dict_entry, min.chars = 1L) - checkmate::expect_string(get_private(object)$.dict_shortaccess, min.chars = 1L) - checkmate::expect_character(get_private(object)$.additional_configuration, + checkmate::expect_flag(mlr3misc::get_private(object)$.has_id) + checkmate::expect_string(mlr3misc::get_private(object)$.dict_entry, min.chars = 1L) + checkmate::expect_string(mlr3misc::get_private(object)$.dict_shortaccess, min.chars = 1L) + checkmate::expect_character(mlr3misc::get_private(object)$.additional_configuration, any.missing = FALSE, min.chars = 1L, unique = TRUE) - checkmate::expect_flag(get_private(object)$.representable) + checkmate::expect_flag(mlr3misc::get_private(object)$.representable) - dict_entry = get_private(object)$.dict_entry - dict_shortaccess = get_private(object)$.dict_shortaccess - additional_configuration = get_private(object)$.additional_configuration + dict_entry = mlr3misc::get_private(object)$.dict_entry + dict_shortaccess = mlr3misc::get_private(object)$.dict_shortaccess + additional_configuration = mlr3misc::get_private(object)$.additional_configuration construction_arguments = as.character(names(formals(object$initialize))) all_fields_list = list() @@ -95,12 +95,12 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m checkmate::expect_string(object$format(), pattern = "^<[^>]+>$") - testthat::expect_true(isTRUE(get_private(object)$.has_id) || identical(dict_entry, object$id)) + testthat::expect_true(isTRUE(mlr3misc::get_private(object)$.has_id) || identical(dict_entry, object$id)) oldhash = object$hash oldphash = object$phash - if (isTRUE(get_private(object)$.has_id)) { + if (isTRUE(mlr3misc::get_private(object)$.has_id)) { oldid = object$id object$id = "newid" testthat::expect_equal(object$id, "newid", info = "id can be set") From aeaa4110c7516f508040419cae4b799e940bc8c4 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 16 Aug 2025 17:09:46 +0200 Subject: [PATCH 20/22] more stuff-not-exported problems --- inst/testthat/helper_test_mlr3component.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/testthat/helper_test_mlr3component.R b/inst/testthat/helper_test_mlr3component.R index 06440cda..d64aa80e 100644 --- a/inst/testthat/helper_test_mlr3component.R +++ b/inst/testthat/helper_test_mlr3component.R @@ -391,7 +391,7 @@ expect_deep_clone = function(one, two) { if (identical(a, baseenv()) || identical(a, globalenv()) || identical(a, emptyenv())) { return(invisible(NULL)) } - if (length(path) > 1 && R6::is.R6(a) && "clone" %nin% names(a)) { + if (length(path) > 1 && R6::is.R6(a) && !"clone" %in% names(a)) { return(invisible(NULL)) # don't check if smth is not cloneable } label = sprintf("Object addresses differ at path %s", paste0(path, collapse = "->")) From 36b0de097e68daced48cfd01ff06fa8b6f1c1f96 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 16 Aug 2025 17:21:06 +0200 Subject: [PATCH 21/22] more stuff-not-exported problems II --- inst/testthat/helper_test_mlr3component.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/testthat/helper_test_mlr3component.R b/inst/testthat/helper_test_mlr3component.R index d64aa80e..d712b78b 100644 --- a/inst/testthat/helper_test_mlr3component.R +++ b/inst/testthat/helper_test_mlr3component.R @@ -141,7 +141,7 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m (!is.na(tops$lower) & tops$lower != tops$upper) | (is.finite(tops$nlevels) & tops$nlevels > 1) ) & - !map_lgl(tops$values[tops$ids()], is.null) + !mlr3misc::map_lgl(tops$values[tops$ids()], is.null) ) } if (length(eligibleparams)) { From fb5e16800c1abd9a39f04c062b0a7151a950f7b2 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 16 Aug 2025 17:30:14 +0200 Subject: [PATCH 22/22] more stuff-not-exported problems III --- inst/testthat/helper_test_mlr3component.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/testthat/helper_test_mlr3component.R b/inst/testthat/helper_test_mlr3component.R index d712b78b..7fb3e7cb 100644 --- a/inst/testthat/helper_test_mlr3component.R +++ b/inst/testthat/helper_test_mlr3component.R @@ -228,11 +228,11 @@ expect_mlr3component_subclass = function(compclass, constargs, check_congruent_m dict_object = do.call(function(...) dict_constructor(dict_entry, ...), constargs) testthat::expect_equal(dict_object, object, info = "object from dictionary is congruent with object") if (length(eligibleparams)) { - dict_object2 = do.call(dict_constructor, c(list(dict_entry), insert_named(constargs, parvals))) + dict_object2 = do.call(dict_constructor, c(list(dict_entry), mlr3misc::insert_named(constargs, parvals))) testthat::expect_equal(dict_object2, object_changed_params, info = "object from dictionary constructed with changed parameters") - dict_object3 = do.call(dict_constructor, c(list(dict_entry), insert_named(constargs, parvals_orig))) + dict_object3 = do.call(dict_constructor, c(list(dict_entry), mlr3misc::insert_named(constargs, parvals_orig))) testthat::expect_equal(dict_object3, object, info = "object from dictionary constructed with original parameter") }