diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 3ecda217..8ea29990 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -29,10 +29,10 @@ jobs: fail-fast: false matrix: r-version: ["oldrel", "release", "devel"] - depends-only: [true, false] + depends-only: [true, false] suggests-only: [true, false] env: - _R_CHECK_DEPENDS_ONLY_: ${{ matrix.depends-only }} + _R_CHECK_DEPENDS_ONLY_: ${{ matrix.depends-only }} _R_CHECK_SUGGESTS_ONLY_: ${{ matrix.suggests-only }} steps: diff --git a/.lintr b/.lintr index 4312a938..210dc9bf 100644 --- a/.lintr +++ b/.lintr @@ -1,4 +1,6 @@ linters: linters_with_defaults(commented_code_linter = NULL, # camel_case_linter = NULL, # snake_case_linter, - spaces_left_parentheses_linter = NULL) + spaces_left_parentheses_linter = NULL, + indentation_linter(hanging_indent_style = "tidy")) +exclusions: list("vignettes/dimensionality-reduction.Rnw") diff --git a/DESCRIPTION b/DESCRIPTION index 97ac2a4c..2a629199 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: dimRed Title: A Framework for Dimensionality Reduction -Version: 0.2.6 +Version: 0.2.7 Authors@R: c( person("Guido", "Kraemer", email = "guido.kraemer@uni-leipzig.de", role = c("aut","cre"), comment = c(ORCID = "0000-0003-4865-5041") @@ -33,7 +33,6 @@ Suggests: keras, kernlab, knitr, - loe, optimx, pcaL1, pcaPP, @@ -55,12 +54,12 @@ BugReports: https://github.com/gdkrmr/dimRed/issues URL: https://www.guido-kraemer.com/software/dimred/ Encoding: UTF-8 Collate: - 'dimRedMethod-class.R' + 'autoencoder.R' 'misc.R' 'dimRedData-class.R' - 'dimRedResult-class.R' - 'autoencoder.R' 'dataSets.R' + 'dimRedMethod-class.R' + 'dimRedResult-class.R' 'diffmap.R' 'dimRed.R' 'drr.R' @@ -86,5 +85,5 @@ Collate: 'soe.R' 'tsne.R' 'umap.R' -RoxygenNote: 7.2.1 +RoxygenNote: 7.3.2 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 07d9d984..7d5a8597 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand export(AUC_lnK_R_NX) -export(AutoEncoder) export(DRR) export(DiffusionMaps) export(DrL) @@ -11,7 +10,6 @@ export(HLLE) export(Isomap) export(KamadaKawai) export(LCMC) -export(LaplacianEigenmaps) export(MDS) export(NNMF) export(PCA) @@ -47,7 +45,6 @@ export(reconstruction_error) export(reconstruction_rmse) export(tSNE) export(total_correlation) -exportClasses(AutoEncoder) exportClasses(DRR) exportClasses(DiffusionMaps) exportClasses(DrL) @@ -56,7 +53,6 @@ exportClasses(FruchtermanReingold) exportClasses(HLLE) exportClasses(Isomap) exportClasses(KamadaKawai) -exportClasses(LaplacianEigenmaps) exportClasses(MDS) exportClasses(NNMF) exportClasses(PCA) diff --git a/R/autoencoder.R b/R/autoencoder.R index d7b45728..583f18f2 100644 --- a/R/autoencoder.R +++ b/R/autoencoder.R @@ -1,420 +1,266 @@ -#' AutoEncoder -#' -#' An S4 Class implementing an Autoencoder -#' -#' Autoencoders are neural networks that try to reproduce their input. Consider -#' this method unstable, as the internals may still be changed. -#' -#' @template dimRedMethodSlots -#' -#' @template dimRedMethodGeneralUsage -#' -#' @section Parameters: -#' Autoencoder can take the following parameters: -#' \describe{ -#' \item{ndim}{The number of dimensions for reduction.} -#' \item{n_hidden}{The number of neurons in the hidden -#' layers, the length specifies the number of layers, -#' the length must be impair, the middle number must -#' be the same as ndim.} -#' \item{activation}{The activation functions for the layers, -#' one of "tanh", "sigmoid", "relu", "elu", everything -#' else will silently be ignored and there will be no -#' activation function for the layer.} -#' \item{weight_decay}{the coefficient for weight decay, -#' set to 0 if no weight decay desired.} -#' \item{learning_rate}{The learning rate for gradient descend} -#' \item{graph}{Optional: A list of bits and pieces that define the -#' autoencoder in tensorflow, see details.} -#' \item{keras_graph}{Optional: A list of keras layers that define -#' the encoder and decoder, specifying this, will ignore all -#' other topology related variables, see details.} -#' \item{batchsize}{If NA, all data will be used for training, -#' else only a random subset of size batchsize will be used} -#' \item{n_steps}{the number of training steps.} -#' } -#' -#' @section Details: -#' There are several ways to specify an autoencoder, the simplest is to pass the -#' number of neurons per layer in \code{n_hidden}, this must be a vector of -#' integers of impair length and it must be symmetric and the middle number must -#' be equal to \code{ndim}, For every layer an activation function can be -#' specified with \code{activation}. -#' -#' For regularization weight decay can be specified by setting -#' \code{weight_decay} > 0. -#' -#' Currently only a gradient descent optimizer is used, the learning rate can be -#' specified by setting \code{learning_rate}. -#' The learner can operate on batches if \code{batchsize} is not \code{NA}. -#' The number of steps the learner uses is specified using \code{n_steps}. -#' -#' @section Further training a model: -#' If the model did not converge in the first training phase or training with -#' different data is desired, the \code{\link{dimRedResult}} object may be -#' passed as \code{autoencoder} parameter; In this case all topology related -#' parameters will be ignored. -#' -#' @section Using Keras layers: -#' The encoder and decoder part can be specified using a list of \pkg{keras} -#' layers. This requires a list with two entries, \code{encoder} should contain -#' a LIST of keras layers WITHOUT the \code{\link[keras]{layer_input}} -#' that will be concatenated in order to form the encoder part. -#' \code{decoder} should be -#' defined accordingly, the output of \code{decoder} must have the same number -#' of dimensions as the input data. -#' -#' @section Using Tensorflow: -#' The model can be entirely defined in \pkg{tensorflow}, it must contain a -#' list with the following entries: -#' \describe{ -#' \item{encoder}{A tensor that defines the encoder.} -#' \item{decoder}{A tensor that defines the decoder.} -#' \item{network}{A tensor that defines the reconstruction (encoder + decoder).} -#' \item{loss}{A tensor that calculates the loss (network + loss function).} -#' \item{in_data}{A \code{placeholder} that points to the data input of -#' the network AND the encoder.} -#' \item{in_decoder}{A \code{placeholder} that points to the input of -#' the decoder.} -#' \item{session}{A \pkg{tensorflow} \code{Session} object that holds -#' the values of the tensors.} -#' } -#' -#' @section Implementation: -#' Uses \pkg{tensorflow} as a backend, for details an -#' problems relating tensorflow, see \url{https://tensorflow.rstudio.com}. -#' -#' @examples -#' \dontrun{ -#' dat <- loadDataSet("3D S Curve") -#' -#' emb <- embed(dat, "AutoEncoder") -#' -#' # predicting is possible: -#' samp <- sample(floor(nrow(dat) / 10)) -#' emb2 <- embed(dat[samp]) -#' emb3 <- predict(emb2, dat[-samp]) -#' -#' plot(emb, type = "2vars") -#' plot(emb2, type = "2vars") -#' points(getData(emb3)) -#' } -#' -#' @include dimRedResult-class.R -#' @include dimRedMethod-class.R -#' @family dimensionality reduction methods -#' @export AutoEncoder -#' @exportClass AutoEncoder -AutoEncoder <- setClass( - "AutoEncoder", - contains = "dimRedMethod", - prototype = list( - stdpars = list(ndim = 2, - n_hidden = c(10, 2, 10), - activation = c("tanh", "lin", "tanh"), - weight_decay = 0.001, - learning_rate = 0.15, - graph = NULL, - keras_graph = NULL, - ## is.na() of an S4 class gives a warning - autoencoder = NULL, - batchsize = NA, - n_steps = 500), - fun = function (data, pars, - keep.org.data = TRUE) { - chckpkg("tensorflow") - tensorflow::tf$compat$v1$disable_v2_behavior() - - meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL - indata <- data@data - - graph <- - if (!is.null(pars$graph)) { - message("using predefined graph, ", - "ignoring other parameters that define topology, ", - "be sure to set ndim to the correct value ", - "else you might run into trouble.") - pars$graph - } else if (!is.null(pars$autoencoder)) { - message("using predefined autoencoder object, ", - " ignoring other parameters that define topology.") - if (!(inherits(pars$autoencoder, "dimRedResult") && - pars$autoencoder@method == "AutoEncoder")) - stop("autoencoder must be NULL, ", - "or of type dimRedResult by an AutoEncoder object.") - - ## setting topology related parameters from autoencoder - pars$ndim <- pars$autoencoder@pars$ndim - pars$n_hidden <- pars$autoencoder@pars$n_hidden - pars$activation <- pars$autoencoder@pars$activation - - pars$autoencoder@pars$graph - } else if (!is.null(pars$keras_graph)) { - message("using predefined keras graph, ", - "ignoring parameters that define topology") - tmp <- graph_keras(encoder = pars$keras_graph$encoder, - decoder = pars$keras_graph$decoder, - n_in = ncol(indata)) - - pars$ndim <- tmp$encoder$shape$dims[[2]]$value - - tmp - } else { - with(pars, { - graph_params( - d_in = ncol(indata), - n_hidden = n_hidden, - activation = activation, - weight_decay = weight_decay, - learning_rate = learning_rate, - n_steps = n_steps, - ndim = ndim - ) - }) - } - - if (!"encoder" %in% names(graph)) stop("no encoder in graph") - if (!"decoder" %in% names(graph)) stop("no decoder in graph") - if (!"network" %in% names(graph)) stop("no network in graph") - if (!"loss" %in% names(graph)) stop("no loss in graph") - if (!"in_decoder" %in% names(graph)) stop("no in_decoder in graph") - if (!"in_data" %in% names(graph)) stop("no in_data in graph") - if (!"session" %in% names(graph)) stop("no session in graph") - - ## TODO: I am not sure if there is a way to do this directly on the list - ## objects - graph_data_input <- graph$in_data - graph_decoder_input <- graph$in_dec - sess <- graph$session - - optimizer <- - tensorflow::tf$compat$v1$train$GradientDescentOptimizer(pars$learning_rate) - train <- optimizer$minimize(graph$loss) - - ## TODO: do proper batching and hold out - for (step in 1:pars$n_steps) { - sess$run(train, feed_dict = - tensorflow::dict( - graph_data_input = - if (is.na(pars$batchsize)) { - indata - } else { - indata[ - sample(seq_len(nrow(indata)), pars$batchsize), - ] - } - ) - ) - } - - outdata <- - sess$run(graph$encoder, - feed_dict = tensorflow::dict(graph_data_input = indata)) - - appl <- function(x) { - appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() - proj <- if (inherits(x, "dimRedData")) x@data else x - - if (ncol(proj) != ncol(data@data)) - stop("x must have the same number of dimensions ", - "as the original data") - - res <- - sess$run(graph$encoder, - feed_dict = tensorflow::dict(graph_data_input = proj)) - - colnames(res) <- paste0("AE", seq_len(ncol(res))) - - new("dimRedData", data = res, meta = appl.meta) - } - - inv <- function(x) { - appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() - proj <- if (inherits(x, "dimRedData")) x@data else x - - if (ncol(proj) != pars$ndim) - stop("x must have the same number of dimensions ", - "as ndim data") - - res <- sess$run( - graph$decoder, - feed_dict = tensorflow::dict( - graph_decoder_input = proj - )) - - colnames(res) <- colnames(indata) - - new("dimRedData", data = res, meta = appl.meta) - } - - ## TODO: this is a hack and there should be an "official" way to save - ## extra data in a dimRedResult object - pars$graph <- graph - - colnames(outdata) <- paste0("AE", seq_len(ncol(outdata))) - - return(new( - "dimRedResult", - data = new("dimRedData", - data = outdata, - meta = meta), - org.data = orgdata, - apply = appl, - inverse = inv, - has.apply = TRUE, - has.inverse = TRUE, - has.org.data = keep.org.data, - method = "AutoEncoder", - pars = pars - )) - }, - requires = c("tensorflow", "keras")) -) - -get_activation_function <- function(x) { - switch( - x, - tanh = tensorflow::tf$compat$v1$tanh, - sigmoid = tensorflow::tf$compat$v1$sigmoid, - relu = tensorflow::tf$compat$v1$nn$relu, - elu = tensorflow::tf$compat$v1$elu, - I - ) -} - -## no idea why these and variants do not work: -## chain_list <- function(x1, x2) Reduce(`%>%`, x2, init = x1) -## chain_list <- function(x) Reduce(`%>%`, x) -chain_list <- function (x1, x2 = NULL) { - - if(is.null(x2)) { - stopifnot(is.list(x1)) - result <- x1[[1]] - if(length(x1) > 1) for (i in 2:length(x1)) { - result <- result %>% (x1[[i]]) - } - } else { - stopifnot(is.list(x2)) - result <- x1 - for (i in 1:length(x2)) { - result <- result %>% (x2[[i]]) - } - } - - return(result) -} - -graph_keras <- function(encoder, decoder, n_in) { - chckpkg("keras") - chckpkg("tensorflow") - - inenc <- keras::layer_input(shape = n_in) - enc <- inenc %>% chain_list(encoder) - - ndim <- enc$shape$dims[[2]]$value - indec <- keras::layer_input(shape = ndim) - dec <- indec %>% chain_list(decoder) - - encdec <- inenc %>% chain_list(encoder) %>% chain_list(decoder) - - ## TODO: check if this uses weight decay, probably not: - loss <- tensorflow::tf$compat$v1$reduce_mean((encdec - inenc) ^ 2) - - sess <- tensorflow::tf$compat$v1$keras$backend$get_session() - - return(list( - encoder = enc, - decoder = dec, - network = encdec, - loss = loss, - in_data = inenc, - in_decoder = indec, - session = sess - )) -} - -graph_params <- function ( - d_in, - n_hidden, - activation, - weight_decay, - learning_rate, - n_steps, - ndim -) { - - if (length(n_hidden) %% 2 == 0) - stop("the number of layers must be impair") - if (ndim != n_hidden[ceiling(length(n_hidden) / 2)]) - stop("the middle of n_hidden must be equal to ndim") - if (length(n_hidden) != length(activation)) - stop("declare an activation function for each layer:", - "\nn_hidden: ", paste(n_hidden, collapse = " "), - "\nactivation functions: ", paste(activation, collapse = " ")) - if (weight_decay < 0) - stop("weight decay must be > 0") - if (learning_rate <= 0) - stop("learning rate must be > 0") - if (n_steps <= 0) - stop("n_steps must be > 0") - - input <- tensorflow::tf$compat$v1$placeholder( - "float", shape = tensorflow::shape(NULL, d_in), - name = "input" - ) - indec <- tensorflow::tf$compat$v1$placeholder( - "float", - shape = tensorflow::shape(NULL, ndim), - name = "nlpca" - ) - - w <- lapply(seq_len(length(n_hidden) + 1), function(x) { - n1 <- if (x == 1) d_in else n_hidden[x - 1] - n2 <- if (x > length(n_hidden)) d_in else n_hidden[x] - tensorflow::tf$compat$v1$Variable(tensorflow::tf$compat$v1$random_uniform(tensorflow::shape(n1, n2), 1.0, -1.0), - name = paste0("w_", x)) - }) - b <- lapply(seq_len(length(n_hidden) + 1), function (x) { - n <- if (x > length(n_hidden)) d_in else n_hidden[x] - tensorflow::tf$compat$v1$Variable(tensorflow::tf$compat$v1$zeros(tensorflow::shape(n)), - name = paste0("b_", x)) - }) - - enc <- input - for (i in 1:ceiling(length(n_hidden) / 2)) { - sigma <- get_activation_function(activation[i]) - enc <- sigma(tensorflow::tf$compat$v1$matmul(enc, w[[i]]) + b[[i]]) - } - - dec <- indec - for (i in (ceiling(length(n_hidden) / 2) + 1):(length(n_hidden) + 1)) { - sigma <- get_activation_function(activation[i]) - dec <- sigma(tensorflow::tf$compat$v1$matmul(dec, w[[i]]) + b[[i]]) - } - - encdec <- enc - for (i in (ceiling(length(n_hidden) / 2) + 1):(length(n_hidden) + 1)) { - sigma <- get_activation_function(activation[i]) - encdec <- sigma(tensorflow::tf$compat$v1$matmul(encdec, w[[i]]) + b[[i]]) - } - - loss <- Reduce(`+`, lapply(w, function (x) tensorflow::tf$compat$v1$reduce_sum(tensorflow::tf$compat$v1$pow(x, 2))), 0) - loss <- Reduce(`+`, lapply(b, function (x) tensorflow::tf$compat$v1$reduce_sum(tensorflow::tf$compat$v1$pow(x, 2))), loss) - loss <- tensorflow::tf$compat$v1$reduce_mean((encdec - input) ^ 2) + weight_decay * loss - - sess <- tensorflow::tf$compat$v1$Session() - ## This closes sess if it is garbage collected. - reg.finalizer(sess, function(x) x$close()) - sess$run(tensorflow::tf$compat$v1$global_variables_initializer()) - - return(list( - encoder = enc, - decoder = dec, - network = encdec, - loss = loss, - in_data = input, - in_decoder = indec, - session = sess - )) -} +## #' AutoEncoder +## #' +## #' An S4 Class implementing an Autoencoder +## #' +## #' Autoencoders are neural networks that try to reproduce their input. Consider +## #' this method unstable, as the internals may still be changed. +## #' +## #' @template dimRedMethodSlots +## #' +## #' @template dimRedMethodGeneralUsage +## #' +## #' @section Parameters: +## #' Autoencoder can take the following parameters: +## #' \describe{ +## #' \item{ndim}{The number of dimensions for reduction.} +## #' \item{n_hidden}{The number of neurons in the hidden +## #' layers, the length specifies the number of layers, +## #' the length must be impair, the middle number must +## #' be the same as ndim.} +## #' \item{activation}{The activation functions for the layers, +## #' one of "tanh", "sigmoid", "relu", "elu", everything +## #' else will silently be ignored and there will be no +## #' activation function for the layer.} +## #' \item{weight_decay}{the coefficient for weight decay, +## #' set to 0 if no weight decay desired.} +## #' \item{learning_rate}{The learning rate for gradient descend} +## #' \item{graph}{Optional: A list of bits and pieces that define the +## #' autoencoder in tensorflow, see details.} +## #' \item{keras_graph}{Optional: A list of keras layers that define +## #' the encoder and decoder, specifying this, will ignore all +## #' other topology related variables, see details.} +## #' \item{batchsize}{If NA, all data will be used for training, +## #' else only a random subset of size batchsize will be used} +## #' \item{n_steps}{the number of training steps.} +## #' } +## #' +## #' @section Details: +## #' There are several ways to specify an autoencoder, the simplest is to pass the +## #' number of neurons per layer in \code{n_hidden}, this must be a vector of +## #' integers of impair length and it must be symmetric and the middle number must +## #' be equal to \code{ndim}, For every layer an activation function can be +## #' specified with \code{activation}. +## #' +## #' For regularization weight decay can be specified by setting +## #' \code{weight_decay} > 0. +## #' +## #' Currently only a gradient descent optimizer is used, the learning rate can be +## #' specified by setting \code{learning_rate}. +## #' The learner can operate on batches if \code{batchsize} is not \code{NA}. +## #' The number of steps the learner uses is specified using \code{n_steps}. +## #' +## #' @section Further training a model: +## #' If the model did not converge in the first training phase or training with +## #' different data is desired, the \code{\link{dimRedResult}} object may be +## #' passed as \code{autoencoder} parameter; In this case all topology related +## #' parameters will be ignored. +## #' +## #' @section Using Keras layers: +## #' The encoder and decoder part can be specified using a list of \pkg{keras} +## #' layers. This requires a list with two entries, \code{encoder} should contain +## #' a LIST of keras layers WITHOUT the \code{\link[keras]{layer_input}} +## #' that will be concatenated in order to form the encoder part. +## #' \code{decoder} should be +## #' defined accordingly, the output of \code{decoder} must have the same number +## #' of dimensions as the input data. +## #' +## #' @section Using Tensorflow: +## #' The model can be entirely defined in \pkg{tensorflow}, it must contain a +## #' list with the following entries: +## #' \describe{ +## #' \item{encoder}{A tensor that defines the encoder.} +## #' \item{decoder}{A tensor that defines the decoder.} +## #' \item{network}{A tensor that defines the reconstruction (encoder + decoder).} +## #' \item{loss}{A tensor that calculates the loss (network + loss function).} +## #' \item{in_data}{A \code{placeholder} that points to the data input of +## #' the network AND the encoder.} +## #' \item{in_decoder}{A \code{placeholder} that points to the input of +## #' the decoder.} +## #' \item{session}{A \pkg{tensorflow} \code{Session} object that holds +## #' the values of the tensors.} +## #' } +## #' +## #' @section Implementation: +## #' Uses \pkg{tensorflow} as a backend, for details an +## #' problems relating tensorflow, see \url{https://tensorflow.rstudio.com}. +## #' +## #' @examples +## #' \dontrun{ +## #' dat <- loadDataSet("3D S Curve") +## #' +## #' emb <- embed(dat, "AutoEncoder") +## #' +## #' # predicting is possible: +## #' samp <- sample(floor(nrow(dat) / 10)) +## #' emb2 <- embed(dat[samp]) +## #' emb3 <- predict(emb2, dat[-samp]) +## #' +## #' plot(emb, type = "2vars") +## #' plot(emb2, type = "2vars") +## #' points(getData(emb3)) +## #' } +## #' +## #' @include dimRedResult-class.R +## #' @include dimRedMethod-class.R +## #' @family dimensionality reduction methods +## #' @export AutoEncoder +## #' @exportClass AutoEncoder +## AutoEncoder <- setClass( +## "AutoEncoder", +## contains = "dimRedMethod", +## prototype = list( +## stdpars = list(ndim = 2, +## n_hidden = c(10, 2, 10), +## activation = c("tanh", "lin", "tanh"), +## learning_rate = 0.15, +## loss = "mse", +## optimizer = "adam", +## encoder = NULL, +## decoder = NULL, +## ## is.na() of an S4 class gives a warning +## batchsize = 20, +## epochs = 5, +## validation_split = 0.2), + +## fun = function (data, pars, +## keep.org.data = TRUE) { +## chckpkg("keras3") +## indata <- data@data +## indims <- ncol(indata) +## ndim <- pars$ndim + +## input_data <- keras3::layer_input(shape = indims) +## input_hidden <- keras3::layer_input(shape = ndim) + +## ## if the user did not specify encoder and decoder, we build them from the +## ## parameters +## if (is.null(pars$encoder) || is.null(pars$decoder)) { +## depth <- length(pars$n_hidden) +## if (depth %% 2 == 0) { +## stop("the number of layers must be impair") +## } + +## if (ndim != pars$n_hidden[ceiling(depth / 2)]) { +## stop("the middle of n_hidden must be equal to ndim") +## } + +## if (depth != length(pars$activation)) { +## stop("declare an activation function for each layer") +## } + + +## in_depth <- ceiling(depth / 2) +## out_depth <- depth - in_depth + + +## layers <- mapply( +## function(s, n) keras3::layer_dense(units = n, activation = s), +## pars$activation, pars$n_hidden, +## SIMPLIFY = FALSE +## ) + +## encoder <- c(input_data, layers[1:in_depth]) +## decoder <- c(input_hidden, layers[(in_depth + 1):depth]) +## } else { +## encoder <- c(input_data, pars$encoder) +## decoder <- c(input_hidden, pars$decoder) +## } + +## ## now build the actual model, compile it and fit it +## autoencoder <- c(input_data, encoder, decoder) +## encoder <- encoder %>% +## chain_list() %>% +## keras3::keras_model() +## decoder <- decoder %>% +## chain_list() %>% +## keras3::keras_model() +## autoencoder <- autoencoder %>% +## chain_list() %>% +## keras3::keras_model() + +## autoencoder %>% +## keras3::compile( +## optimizer = pars$optimizer, +## loss = pars$loss +## ) + +## history <- autoencoder %>% +## keras3::fit( +## indata, indata, +## epochs = pars$epochs, +## batch_size = pars$batchsize, +## validation_split = pars$validation_split, +## verbose = 0 +## ) + +## meta <- data@meta +## orgdata <- if (keep.org.data) data@data else NULL +## indata <- data@data + +## appl <- function(x) { +## appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() +## proj <- if (inherits(x, "dimRedData")) x@data else x + +## if (ncol(proj) != ncol(data@data)) +## stop("x must have the same number of dimensions ", +## "as the original data") + +## res <- encoder(proj) +## colnames(res) <- paste0("AE", seq_len(ncol(res))) +## new("dimRedData", data = res, meta = appl.meta) +## } + +## inv <- function(x) { +## appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() +## proj <- if (inherits(x, "dimRedData")) x@data else x + +## if (ncol(proj) != pars$ndim) +## stop("x must have the same number of dimensions ", +## "as ndim data") + +## res <- decoder(proj) +## colnames(res) <- colnames(indata) +## new("dimRedData", data = res, meta = appl.meta) +## } + +## colnames(outdata) <- paste0("AE", seq_len(ncol(outdata))) + +## return(new( +## "dimRedResult", +## data = new("dimRedData", +## data = outdata, +## meta = meta), +## org.data = orgdata, +## apply = appl, +## inverse = inv, +## has.apply = TRUE, +## has.inverse = TRUE, +## has.org.data = keep.org.data, +## method = "AutoEncoder", +## pars = pars +## )) +## }, +## requires = c("tensorflow", "keras3")) +## ) + +## ## no idea why these and variants do not work: +## ## chain_list <- function(x1, x2) Reduce(`%>%`, x2, init = x1) +## ## chain_list <- function(x) Reduce(`%>%`, x) +## chain_list <- function(x1, x2 = NULL) { + +## if (is.null(x2)) { +## stopifnot(is.list(x1)) +## result <- x1[[1]] +## if (length(x1) > 1) for (i in 2:length(x1)) { +## result <- result %>% (x1[[i]]) +## } +## } else { +## stopifnot(is.list(x2)) +## result <- x1 +## for (i in 1:length(x2)) { +## result <- result %>% (x2[[i]]) +## } +## } + +## return(result) +## } diff --git a/R/drr.R b/R/drr.R index fe5cbaeb..d82735de 100644 --- a/R/drr.R +++ b/R/drr.R @@ -50,7 +50,7 @@ #' \code{kernel.pars}, using less parameter values will speed up #' computation time. Calculation of KRR can be accelerated by #' increasing \code{fastkrr.nblocks}, it should be smaller than -#' n^{1/3} up to sacrificing some accuracy, for details see +#' \eqn{n^{1/3}} up to sacrificing some accuracy, for details see #' \code{\link[DRR]{constructFastKRRLearner}}. Another way to speed up #' is to use \code{pars$fastcv = TRUE} which might provide a more #' efficient way to search the parameter space but may also miss the @@ -111,10 +111,13 @@ DRR <- setClass( meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) indata <- data@data - res <- do.call(DRR::drr, c(list(X = indata), pars)) + # CVST from DRR complains about non-positive definite matrices + suppressWarnings( + res <- do.call(DRR::drr, c(list(X = indata), pars)) + ) outdata <- res$fitted.data colnames(outdata) <- paste0("DRR", 1:ncol(outdata)) diff --git a/R/fastica.R b/R/fastica.R index db69219d..33614fb4 100644 --- a/R/fastica.R +++ b/R/fastica.R @@ -52,7 +52,7 @@ FastICA <- setClass( chckpkg("fastICA") meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) orgdata.colmeans <- colMeans(orgdata) indata <- data@data diff --git a/R/graph_embed.R b/R/graph_embed.R index 12cab92e..b98d89f9 100644 --- a/R/graph_embed.R +++ b/R/graph_embed.R @@ -60,7 +60,7 @@ KamadaKawai <- setClass( chckpkg("igraph") meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) indata <- data@data outdata <- em_graph_layout( diff --git a/R/hlle.R b/R/hlle.R index 77ac4501..1e4dd5c2 100644 --- a/R/hlle.R +++ b/R/hlle.R @@ -116,7 +116,7 @@ HLLE <- setClass( data = new("dimRedData", data = outdata, meta = data@meta), - org.data = if (keep.org.data) data@data else NULL, + org.data = if (keep.org.data) data@data else matrix(0, 0, 0), has.org.data = keep.org.data, method = "HLLE", pars = pars diff --git a/R/isomap.R b/R/isomap.R index 54dcbb42..cf32c751 100644 --- a/R/isomap.R +++ b/R/isomap.R @@ -69,7 +69,7 @@ Isomap <- setClass( chckpkg("RANN") message(Sys.time(), ": Isomap START") meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) indata <- data@data if (is.null(pars$eps)) pars$eps <- 0 @@ -195,7 +195,7 @@ makeKNNgraph <- function (x, k, eps = 0, diag = FALSE){ if (diag) as.vector(nn2res$nn.dists) else as.vector(nn2res$nn.dists[, -1]) - return(igraph::as.undirected(g, mode = "collapse", edge.attr.comb = "first")) + return(igraph::as_undirected(g, mode = "collapse", edge.attr.comb = "first")) } ## the original isomap method I'll keep it here for completeness: diff --git a/R/kpca.R b/R/kpca.R index 2e5d0602..bd55985d 100644 --- a/R/kpca.R +++ b/R/kpca.R @@ -65,7 +65,7 @@ kPCA <- setClass( if (is.null(pars$ndim)) pars$ndim <- 2 meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) indata <- data@data message(Sys.time(), ": Calculating kernel PCA") diff --git a/R/l1pca.R b/R/l1pca.R index fe51435f..c9515e14 100644 --- a/R/l1pca.R +++ b/R/l1pca.R @@ -68,7 +68,7 @@ PCA_L1 <- setClass( newnames <- paste0("PC", seq_len(ndim)) meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) data <- data@data fun2 <- if(!is.function(pars$fun)) { diff --git a/R/leim.R b/R/leim.R index e643cbf6..e285a12a 100644 --- a/R/leim.R +++ b/R/leim.R @@ -1,149 +1,153 @@ -#' Laplacian Eigenmaps -#' -#' An S4 Class implementing Laplacian Eigenmaps -#' -#' Laplacian Eigenmaps use a kernel and were originally developed to -#' separate non-convex clusters under the name spectral clustering. -#' -#' @template dimRedMethodSlots -#' -#' @template dimRedMethodGeneralUsage -#' -#' @section Parameters: -#' \code{LaplacianEigenmaps} can take the following parameters: -#' \describe{ -#' \item{ndim}{the number of output dimensions.} -#' -#' \item{sparse}{A character vector specifying hot to make the graph -#' sparse, \code{"knn"} means that a K-nearest neighbor graph is -#' constructed, \code{"eps"} an epsilon neighborhood graph is -#' constructed, else a dense distance matrix is used.} -#' -#' \item{knn}{The number of nearest neighbors to use for the knn graph.} -#' \item{eps}{The distance for the epsilon neighborhood graph.} -#' -#' \item{t}{Parameter for the transformation of the distance matrix -#' by \eqn{w=exp(-d^2/t)}, larger values give less weight to -#' differences in distance, \code{t == Inf} treats all distances != 0 equally.} -#' \item{norm}{logical, should the normed laplacian be used?} -#' } -#' -#' @section Implementation: -#' Wraps around \code{\link[loe]{spec.emb}}. -#' -#' @references -#' -#' Belkin, M., Niyogi, P., 2003. Laplacian Eigenmaps for -#' Dimensionality Reduction and Data Representation. Neural -#' Computation 15, 1373. -#' -#' @examples -#' if(requireNamespace(c("loe", "RSpectra", "Matrix"), quietly = TRUE)) { -#' -#' dat <- loadDataSet("3D S Curve") -#' emb <- embed(dat, "LaplacianEigenmaps") -#' plot(emb@data@data) -#' -#' } -#' @include dimRedResult-class.R -#' @include dimRedMethod-class.R -#' @export LaplacianEigenmaps -#' @exportClass LaplacianEigenmaps -LaplacianEigenmaps <- setClass( - "LaplacianEigenmaps", - contains = "dimRedMethod", - prototype = list( - stdpars = list(ndim = 2, sparse = "knn", knn = 50, eps = 0.1, - t = Inf, norm = TRUE), - fun = function (data, pars, - keep.org.data = TRUE) { - chckpkg("loe") - chckpkg("RSpectra") - chckpkg("Matrix") +## loe got removed from CRAN - meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL - indata <- data@data - if (is.null(pars$d)) pars$d <- dist - if (is.null(pars$knn)) pars$knn <- 50 - if (is.null(pars$ndim)) pars$ndim <- 2 - if (is.null(pars$t)) pars$t <- Inf - if (is.null(pars$norm)) pars$norm <- TRUE +## ' Laplacian Eigenmaps +## #' +## #' An S4 Class implementing Laplacian Eigenmaps +## #' +## #' Laplacian Eigenmaps use a kernel and were originally developed to +## #' separate non-convex clusters under the name spectral clustering. +## #' +## #' @template dimRedMethodSlots +## #' +## #' @template dimRedMethodGeneralUsage +## #' +## #' @section Parameters: +## #' \code{LaplacianEigenmaps} can take the following parameters: +## #' \describe{ +## #' \item{ndim}{the number of output dimensions.} +## #' +## #' \item{sparse}{A character vector specifying hot to make the graph +## #' sparse, \code{"knn"} means that a K-nearest neighbor graph is +## #' constructed, \code{"eps"} an epsilon neighborhood graph is +## #' constructed, else a dense distance matrix is used.} +## #' +## #' \item{knn}{The number of nearest neighbors to use for the knn graph.} +## #' \item{eps}{The distance for the epsilon neighborhood graph.} +## #' +## #' \item{t}{Parameter for the transformation of the distance matrix +## #' by \eqn{w=exp(-d^2/t)}, larger values give less weight to +## #' differences in distance, \code{t == Inf} treats all distances != 0 equally.} +## #' \item{norm}{logical, should the normed laplacian be used?} +## #' } +## #' +## #' @section Implementation: +## #' Wraps around \code{\link[loe]{spec.emb}}. +## #' +## #' @references +## #' +## #' Belkin, M., Niyogi, P., 2003. Laplacian Eigenmaps for +## #' Dimensionality Reduction and Data Representation. Neural +## #' Computation 15, 1373. +## #' +## #' @examples +## #' if(requireNamespace(c("loe", "RSpectra", "Matrix"), quietly = TRUE)) { +## #' +## #' dat <- loadDataSet("3D S Curve") +## #' emb <- embed(dat, "LaplacianEigenmaps") +## #' plot(emb@data@data) +## #' +## #' } +## #' @include dimRedResult-class.R +## #' @include dimRedMethod-class.R +## #' @export LaplacianEigenmaps +## #' @exportClass LaplacianEigenmaps +## LaplacianEigenmaps <- setClass( +## "LaplacianEigenmaps", +## contains = "dimRedMethod", +## prototype = list( +## stdpars = list(ndim = 2, sparse = "knn", knn = 50, eps = 0.1, +## t = Inf, norm = TRUE), +## fun = function (data, pars, +## keep.org.data = TRUE) { +## chckpkg("loe") +## chckpkg("RSpectra") +## chckpkg("Matrix") - message(Sys.time(), ": Creating weight matrix") - W <- if (pars$sparse == "knn") { - knng <- makeKNNgraph(indata, k = pars$knn, eps = 0, - diag = TRUE) - if (is.infinite(pars$t)){ - knng <- igraph::set_edge_attr(knng, name = "weight", value = 1) - } else { - ea <- igraph::edge_attr(knng, name = "weight") - knng <- igraph::set_edge_attr( - knng, name = "weight", value = exp( -(ea ^ 2) / pars$t )) - } - igraph::as_adj(knng, sparse = TRUE, - attr = "weight", type = "both") - } else if (pars$sparse == "eps") { - tmp <- makeEpsSparseMatrix(indata, pars$eps) - tmp@x <- if (is.infinite(pars$t)) rep(1, length(tmp@i)) - else exp(- (tmp@x ^ 2) / pars$t) - ## diag(tmp) <- 1 - as(tmp, "dgCMatrix") - } else { # dense case - tmp <- dist(indata) - tmp[] <- if (is.infinite(pars$t)) 1 - else exp( -(tmp ^ 2) / pars$t) - tmp <- as.matrix(tmp) - diag(tmp) <- 1 - tmp - } +## meta <- data@meta +## orgdata <- if (keep.org.data) data@data else NULL +## indata <- data@data - ## we don't need to test for symmetry, because we know the - ## matrix is symmetric - D <- Matrix::Diagonal(x = Matrix::rowSums(W)) - L <- D - W - ## for the generalized eigenvalue problem, we do not have a solver - ## use A u = \lambda B u - ## Lgen <- Matrix::Diagonal(x = 1 / Matrix::diag(D) ) %*% L - ## but then we get negative eigenvalues and complex eigenvalues - Lgen <- L - message(Sys.time(), ": Eigenvalue decomposition") - outdata <- if (pars$norm) { - DS <- Matrix::Diagonal(x = 1 / sqrt(Matrix::diag(D))) - RSpectra::eigs_sym(DS %*% Lgen %*% DS, - k = pars$ndim + 1, - sigma = -1e-5) - } else { - RSpectra::eigs_sym(Lgen, - k = pars$ndim + 1, - sigma = -1e-5) - } - message("Eigenvalues: ", paste(format(outdata$values), - collapse = " ")) - ## The eigenvalues are in decreasing order and we remove the - ## smallest, which should be approx 0: - outdata <- outdata$vectors[, order(outdata$values)[-1], - drop = FALSE] +## if (is.null(pars$d)) pars$d <- dist +## if (is.null(pars$knn)) pars$knn <- 50 +## if (is.null(pars$ndim)) pars$ndim <- 2 +## if (is.null(pars$t)) pars$t <- Inf +## if (is.null(pars$norm)) pars$norm <- TRUE - if(ncol(outdata) > 0) { - colnames(outdata) <- paste0("LEIM", seq_len(ncol(outdata))) - } else { - warning("no dimensions left, this is probably due to a badly conditioned eigenvalue decomposition.") - } - message(Sys.time(), ": DONE") - return(new( - "dimRedResult", - data = new("dimRedData", - data = outdata, - meta = meta), - org.data = orgdata, - has.org.data = keep.org.data, - method = "leim", - pars = pars - )) - }, - requires = c("loe", "RSpectra", "Matrix")) -) +## message(Sys.time(), ": Creating weight matrix") +## W <- if (pars$sparse == "knn") { +## knng <- makeKNNgraph(indata, k = pars$knn, eps = 0, +## diag = TRUE) +## if (is.infinite(pars$t)){ +## knng <- igraph::set_edge_attr(knng, name = "weight", value = 1) +## } else { +## ea <- igraph::edge_attr(knng, name = "weight") +## knng <- igraph::set_edge_attr( +## knng, name = "weight", value = exp( -(ea ^ 2) / pars$t )) +## } +## igraph::as_adj(knng, sparse = TRUE, +## attr = "weight", type = "both") +## } else if (pars$sparse == "eps") { +## tmp <- makeEpsSparseMatrix(indata, pars$eps) +## tmp@x <- if (is.infinite(pars$t)) rep(1, length(tmp@i)) +## else exp(- (tmp@x ^ 2) / pars$t) +## ## diag(tmp) <- 1 +## as(tmp, "dgCMatrix") +## } else { # dense case +## tmp <- dist(indata) +## tmp[] <- if (is.infinite(pars$t)) 1 +## else exp( -(tmp ^ 2) / pars$t) +## tmp <- as.matrix(tmp) +## diag(tmp) <- 1 +## tmp +## } + +## ## we don't need to test for symmetry, because we know the +## ## matrix is symmetric +## D <- Matrix::Diagonal(x = Matrix::rowSums(W)) +## L <- D - W +## ## for the generalized eigenvalue problem, we do not have a solver +## ## use A u = \lambda B u +## ## Lgen <- Matrix::Diagonal(x = 1 / Matrix::diag(D) ) %*% L +## ## but then we get negative eigenvalues and complex eigenvalues +## Lgen <- L +## message(Sys.time(), ": Eigenvalue decomposition") +## outdata <- if (pars$norm) { +## DS <- Matrix::Diagonal(x = 1 / sqrt(Matrix::diag(D))) +## RSpectra::eigs_sym(DS %*% Lgen %*% DS, +## k = pars$ndim + 1, +## sigma = -1e-5) +## } else { +## RSpectra::eigs_sym(Lgen, +## k = pars$ndim + 1, +## sigma = -1e-5) +## } +## message("Eigenvalues: ", paste(format(outdata$values), +## collapse = " ")) +## ## The eigenvalues are in decreasing order and we remove the +## ## smallest, which should be approx 0: +## outdata <- outdata$vectors[, order(outdata$values)[-1], +## drop = FALSE] + +## if(ncol(outdata) > 0) { +## colnames(outdata) <- paste0("LEIM", seq_len(ncol(outdata))) +## } else { +## warning("no dimensions left, this is probably due to a badly conditioned eigenvalue decomposition.") +## } + +## message(Sys.time(), ": DONE") +## return(new( +## "dimRedResult", +## data = new("dimRedData", +## data = outdata, +## meta = meta), +## org.data = orgdata, +## has.org.data = keep.org.data, +## method = "leim", +## pars = pars +## )) +## }, +## requires = c("loe", "RSpectra", "Matrix")) +## ) diff --git a/R/mds.R b/R/mds.R index 8aede204..387c603c 100644 --- a/R/mds.R +++ b/R/mds.R @@ -57,7 +57,7 @@ MDS <- setClass( keep.org.data = TRUE) { ## meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) indata <- data@data ## there are only efficient implementations for euclidean diff --git a/R/nmds.R b/R/nmds.R index fe16f84e..4e406611 100644 --- a/R/nmds.R +++ b/R/nmds.R @@ -48,7 +48,7 @@ nMDS <- setClass( chckpkg("vegan") meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) indata <- data@data outdata <- vegan::monoMDS(pars$d(indata), k = pars$ndim)$points diff --git a/R/nnmf.R b/R/nnmf.R index 6ecf9c96..62b0f709 100644 --- a/R/nnmf.R +++ b/R/nnmf.R @@ -71,7 +71,7 @@ NNMF <- setClass( ## require("NMF") meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) data <- data@data if (!is.matrix(data)) data <- as.matrix(data) diff --git a/R/pca.R b/R/pca.R index f88f32dd..8fc53df7 100644 --- a/R/pca.R +++ b/R/pca.R @@ -59,7 +59,7 @@ PCA <- setClass( pars$ndim <- NULL meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) data <- data@data res <- do.call( prcomp, diff --git a/R/rotate.R b/R/rotate.R index fc6588c6..c2c3ba35 100644 --- a/R/rotate.R +++ b/R/rotate.R @@ -87,17 +87,23 @@ setMethod( if (ncol(planes) == 0) break - o <- optimx::optimx( - par = rep(0, nplanes), - fn = obj, - method = "L-BFGS-B", - lower = 0, - upper = 2 * pi, - X = as.matrix(X), - Y = as.matrix(Y), - axis = axis, - without_axes = without_axes, - cor_method = cor_method + # optimx throws a warning that we have to suppress: + ## Warning messages: + ## 1: In max(logpar) : no non-missing arguments to max; returning -Inf + ## 2: In min(logpar) : no non-missing arguments to min; returning Inf + suppressWarnings( + o <- optimx::optimx( + par = rep(0, nplanes), + fn = obj, + method = "L-BFGS-B", + lower = 0, + upper = 2 * pi, + X = as.matrix(X), + Y = as.matrix(Y), + axis = axis, + without_axes = without_axes, + cor_method = cor_method + ) ) ## The result looks like this: ## p1 value fevals gevals niter convcode kkt1 kkt2 xtimes diff --git a/R/tsne.R b/R/tsne.R index 9f0fc10f..a4c83673 100644 --- a/R/tsne.R +++ b/R/tsne.R @@ -59,7 +59,7 @@ tSNE <- setClass( chckpkg("Rtsne") meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) indata <- data@data outdata <- Rtsne::Rtsne(pars$d(indata), diff --git a/R/umap.R b/R/umap.R index a78290ea..da53b436 100644 --- a/R/umap.R +++ b/R/umap.R @@ -81,7 +81,7 @@ UMAP <- setClass( } meta <- data@meta - orgdata <- if (keep.org.data) data@data else NULL + orgdata <- if (keep.org.data) data@data else matrix(0, 0, 0) indata <- data@data ## Create config diff --git a/checkpackage.sh b/checkpackage.sh index 729c8a5d..9d063937 100755 --- a/checkpackage.sh +++ b/checkpackage.sh @@ -2,8 +2,9 @@ echo "== START =============================================" +R_HOME= # R_FOLDER=/usr/bin -R_FOLDER=$HOME/progs/R/R-4.2.1/bin +R_FOLDER=$HOME/progs/R/R-4.4.2/bin # R_FOLDER=$HOME/progs/R/R-devel/bin $R_FOLDER/R --version diff --git a/man/AutoEncoder-class.Rd b/man/AutoEncoder-class.Rd deleted file mode 100644 index cabe290e..00000000 --- a/man/AutoEncoder-class.Rd +++ /dev/null @@ -1,160 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/autoencoder.R -\docType{class} -\name{AutoEncoder-class} -\alias{AutoEncoder-class} -\alias{AutoEncoder} -\title{AutoEncoder} -\description{ -An S4 Class implementing an Autoencoder -} -\details{ -Autoencoders are neural networks that try to reproduce their input. Consider -this method unstable, as the internals may still be changed. -} -\section{Slots}{ - -\describe{ -\item{\code{fun}}{A function that does the embedding and returns a -dimRedResult object.} - -\item{\code{stdpars}}{The standard parameters for the function.} -}} - -\section{General usage}{ - -Dimensionality reduction methods are S4 Classes that either be used -directly, in which case they have to be initialized and a full -list with parameters has to be handed to the \code{@fun()} -slot, or the method name be passed to the embed function and -parameters can be given to the \code{...}, in which case -missing parameters will be replaced by the ones in the -\code{@stdpars}. -} - -\section{Parameters}{ - -Autoencoder can take the following parameters: -\describe{ - \item{ndim}{The number of dimensions for reduction.} - \item{n_hidden}{The number of neurons in the hidden - layers, the length specifies the number of layers, - the length must be impair, the middle number must - be the same as ndim.} - \item{activation}{The activation functions for the layers, - one of "tanh", "sigmoid", "relu", "elu", everything - else will silently be ignored and there will be no - activation function for the layer.} - \item{weight_decay}{the coefficient for weight decay, - set to 0 if no weight decay desired.} - \item{learning_rate}{The learning rate for gradient descend} - \item{graph}{Optional: A list of bits and pieces that define the - autoencoder in tensorflow, see details.} - \item{keras_graph}{Optional: A list of keras layers that define - the encoder and decoder, specifying this, will ignore all - other topology related variables, see details.} - \item{batchsize}{If NA, all data will be used for training, - else only a random subset of size batchsize will be used} - \item{n_steps}{the number of training steps.} -} -} - -\section{Details}{ - -There are several ways to specify an autoencoder, the simplest is to pass the -number of neurons per layer in \code{n_hidden}, this must be a vector of -integers of impair length and it must be symmetric and the middle number must -be equal to \code{ndim}, For every layer an activation function can be -specified with \code{activation}. - -For regularization weight decay can be specified by setting -\code{weight_decay} > 0. - -Currently only a gradient descent optimizer is used, the learning rate can be -specified by setting \code{learning_rate}. -The learner can operate on batches if \code{batchsize} is not \code{NA}. -The number of steps the learner uses is specified using \code{n_steps}. -} - -\section{Further training a model}{ - -If the model did not converge in the first training phase or training with -different data is desired, the \code{\link{dimRedResult}} object may be -passed as \code{autoencoder} parameter; In this case all topology related -parameters will be ignored. -} - -\section{Using Keras layers}{ - -The encoder and decoder part can be specified using a list of \pkg{keras} -layers. This requires a list with two entries, \code{encoder} should contain -a LIST of keras layers WITHOUT the \code{\link[keras]{layer_input}} -that will be concatenated in order to form the encoder part. -\code{decoder} should be -defined accordingly, the output of \code{decoder} must have the same number -of dimensions as the input data. -} - -\section{Using Tensorflow}{ - -The model can be entirely defined in \pkg{tensorflow}, it must contain a -list with the following entries: -\describe{ - \item{encoder}{A tensor that defines the encoder.} - \item{decoder}{A tensor that defines the decoder.} - \item{network}{A tensor that defines the reconstruction (encoder + decoder).} - \item{loss}{A tensor that calculates the loss (network + loss function).} - \item{in_data}{A \code{placeholder} that points to the data input of - the network AND the encoder.} - \item{in_decoder}{A \code{placeholder} that points to the input of - the decoder.} - \item{session}{A \pkg{tensorflow} \code{Session} object that holds - the values of the tensors.} -} -} - -\section{Implementation}{ - -Uses \pkg{tensorflow} as a backend, for details an - problems relating tensorflow, see \url{https://tensorflow.rstudio.com}. -} - -\examples{ -\dontrun{ -dat <- loadDataSet("3D S Curve") - -emb <- embed(dat, "AutoEncoder") - -# predicting is possible: -samp <- sample(floor(nrow(dat) / 10)) -emb2 <- embed(dat[samp]) -emb3 <- predict(emb2, dat[-samp]) - -plot(emb, type = "2vars") -plot(emb2, type = "2vars") -points(getData(emb3)) -} - -} -\seealso{ -Other dimensionality reduction methods: -\code{\link{DRR-class}}, -\code{\link{DiffusionMaps-class}}, -\code{\link{DrL-class}}, -\code{\link{FastICA-class}}, -\code{\link{FruchtermanReingold-class}}, -\code{\link{HLLE-class}}, -\code{\link{Isomap-class}}, -\code{\link{KamadaKawai-class}}, -\code{\link{MDS-class}}, -\code{\link{NNMF-class}}, -\code{\link{PCA-class}}, -\code{\link{PCA_L1-class}}, -\code{\link{UMAP-class}}, -\code{\link{dimRedMethod-class}}, -\code{\link{dimRedMethodList}()}, -\code{\link{kPCA-class}}, -\code{\link{nMDS-class}}, -\code{\link{tSNE-class}} -} -\concept{dimensionality reduction methods} diff --git a/man/DRR-class.Rd b/man/DRR-class.Rd index c45e950a..6dd89a1c 100644 --- a/man/DRR-class.Rd +++ b/man/DRR-class.Rd @@ -76,7 +76,7 @@ over all parameter combinations of \code{lambda} and \code{kernel.pars}, using less parameter values will speed up computation time. Calculation of KRR can be accelerated by increasing \code{fastkrr.nblocks}, it should be smaller than -n^{1/3} up to sacrificing some accuracy, for details see +\eqn{n^{1/3}} up to sacrificing some accuracy, for details see \code{\link[DRR]{constructFastKRRLearner}}. Another way to speed up is to use \code{pars$fastcv = TRUE} which might provide a more efficient way to search the parameter space but may also miss the @@ -110,7 +110,6 @@ Laparra, V., Malo, J., Camps-Valls, G., } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, diff --git a/man/DiffusionMaps-class.Rd b/man/DiffusionMaps-class.Rd index a4debc07..d8a5fae0 100644 --- a/man/DiffusionMaps-class.Rd +++ b/man/DiffusionMaps-class.Rd @@ -90,7 +90,6 @@ Coifman, R.R., Lafon, S., 2006. Diffusion maps. Applied and } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, diff --git a/man/DrL-class.Rd b/man/DrL-class.Rd index 2aa46765..0b5c189c 100644 --- a/man/DrL-class.Rd +++ b/man/DrL-class.Rd @@ -69,7 +69,6 @@ Martin, S., Brown, W.M., Wylie, B.N., 2007. Dr.l: Distributed Recursive } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{FastICA-class}}, diff --git a/man/FastICA-class.Rd b/man/FastICA-class.Rd index b9e84e61..24868240 100644 --- a/man/FastICA-class.Rd +++ b/man/FastICA-class.Rd @@ -65,7 +65,6 @@ https://doi.org/10.1109/72.761722 } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/FruchtermanReingold-class.Rd b/man/FruchtermanReingold-class.Rd index 5288e9f1..e48f54d5 100644 --- a/man/FruchtermanReingold-class.Rd +++ b/man/FruchtermanReingold-class.Rd @@ -62,7 +62,6 @@ https://doi.org/10.1002/spe.4380211102 } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/HLLE-class.Rd b/man/HLLE-class.Rd index 48d85c11..9ee187f7 100644 --- a/man/HLLE-class.Rd +++ b/man/HLLE-class.Rd @@ -64,7 +64,6 @@ embedding techniques for high-dimensional data. PNAS 100, } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/Isomap-class.Rd b/man/Isomap-class.Rd index eab38ce0..2036db8b 100644 --- a/man/Isomap-class.Rd +++ b/man/Isomap-class.Rd @@ -80,7 +80,6 @@ https://doi.org/10.1126/science.290.5500.2319 } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/KamadaKawai-class.Rd b/man/KamadaKawai-class.Rd index 692fc60d..f920c2b6 100644 --- a/man/KamadaKawai-class.Rd +++ b/man/KamadaKawai-class.Rd @@ -72,7 +72,6 @@ https://doi.org/10.1016/0020-0190(89)90102-6 } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/LaplacianEigenmaps-class.Rd b/man/LaplacianEigenmaps-class.Rd deleted file mode 100644 index f36e4909..00000000 --- a/man/LaplacianEigenmaps-class.Rd +++ /dev/null @@ -1,74 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/leim.R -\docType{class} -\name{LaplacianEigenmaps-class} -\alias{LaplacianEigenmaps-class} -\alias{LaplacianEigenmaps} -\title{Laplacian Eigenmaps} -\description{ -An S4 Class implementing Laplacian Eigenmaps -} -\details{ -Laplacian Eigenmaps use a kernel and were originally developed to -separate non-convex clusters under the name spectral clustering. -} -\section{Slots}{ - -\describe{ -\item{\code{fun}}{A function that does the embedding and returns a -dimRedResult object.} - -\item{\code{stdpars}}{The standard parameters for the function.} -}} - -\section{General usage}{ - -Dimensionality reduction methods are S4 Classes that either be used -directly, in which case they have to be initialized and a full -list with parameters has to be handed to the \code{@fun()} -slot, or the method name be passed to the embed function and -parameters can be given to the \code{...}, in which case -missing parameters will be replaced by the ones in the -\code{@stdpars}. -} - -\section{Parameters}{ - -\code{LaplacianEigenmaps} can take the following parameters: -\describe{ - \item{ndim}{the number of output dimensions.} - - \item{sparse}{A character vector specifying hot to make the graph - sparse, \code{"knn"} means that a K-nearest neighbor graph is - constructed, \code{"eps"} an epsilon neighborhood graph is - constructed, else a dense distance matrix is used.} - - \item{knn}{The number of nearest neighbors to use for the knn graph.} - \item{eps}{The distance for the epsilon neighborhood graph.} - - \item{t}{Parameter for the transformation of the distance matrix - by \eqn{w=exp(-d^2/t)}, larger values give less weight to - differences in distance, \code{t == Inf} treats all distances != 0 equally.} - \item{norm}{logical, should the normed laplacian be used?} -} -} - -\section{Implementation}{ - -Wraps around \code{\link[loe]{spec.emb}}. -} - -\examples{ -if(requireNamespace(c("loe", "RSpectra", "Matrix"), quietly = TRUE)) { - -dat <- loadDataSet("3D S Curve") -emb <- embed(dat, "LaplacianEigenmaps") -plot(emb@data@data) - -} -} -\references{ -Belkin, M., Niyogi, P., 2003. Laplacian Eigenmaps for -Dimensionality Reduction and Data Representation. Neural -Computation 15, 1373. -} diff --git a/man/MDS-class.Rd b/man/MDS-class.Rd index 167d2724..f8fb960b 100644 --- a/man/MDS-class.Rd +++ b/man/MDS-class.Rd @@ -72,7 +72,6 @@ Psychometrika 17, 401-419. https://doi.org/10.1007/BF02288916 } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/NNMF-class.Rd b/man/NNMF-class.Rd index 9530a402..1fcf2753 100644 --- a/man/NNMF-class.Rd +++ b/man/NNMF-class.Rd @@ -79,7 +79,6 @@ matrix factorization. Nature 401, 788-791. https://doi.org/10.1038/44565 } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/PCA-class.Rd b/man/PCA-class.Rd index 5f1768dd..d8a30863 100644 --- a/man/PCA-class.Rd +++ b/man/PCA-class.Rd @@ -70,7 +70,6 @@ space. Philosophical Magazine 2, 559-572. } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/PCA_L1-class.Rd b/man/PCA_L1-class.Rd index 3c08b2dd..b4a3bfe6 100644 --- a/man/PCA_L1-class.Rd +++ b/man/PCA_L1-class.Rd @@ -73,7 +73,6 @@ Algorithms for L1-Norm Principal Component Analysis, in: Data Mining (ICDM), } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/UMAP-class.Rd b/man/UMAP-class.Rd index 00b3354d..5fbefd6f 100644 --- a/man/UMAP-class.Rd +++ b/man/UMAP-class.Rd @@ -85,7 +85,6 @@ https://arxiv.org/abs/1802.03426 } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/dimRedMethod-class.Rd b/man/dimRedMethod-class.Rd index cea74b13..71e775f4 100644 --- a/man/dimRedMethod-class.Rd +++ b/man/dimRedMethod-class.Rd @@ -41,7 +41,6 @@ Tensorflow. Used to auto skip tests} \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/dimRedMethodList.Rd b/man/dimRedMethodList.Rd index 1215485e..990d4f72 100644 --- a/man/dimRedMethodList.Rd +++ b/man/dimRedMethodList.Rd @@ -26,7 +26,6 @@ dimRedMethodList() } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/kPCA-class.Rd b/man/kPCA-class.Rd index 771f6ea1..266df5fd 100644 --- a/man/kPCA-class.Rd +++ b/man/kPCA-class.Rd @@ -73,7 +73,6 @@ https://doi.org/10.1162/089976698300017467 } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/nMDS-class.Rd b/man/nMDS-class.Rd index e1c2c220..68987c1c 100644 --- a/man/nMDS-class.Rd +++ b/man/nMDS-class.Rd @@ -62,7 +62,6 @@ Psychometrika 29, 115-129. https://doi.org/10.1007/BF02289694 } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/man/tSNE-class.Rd b/man/tSNE-class.Rd index ddf78a84..5f41728f 100644 --- a/man/tSNE-class.Rd +++ b/man/tSNE-class.Rd @@ -71,7 +71,6 @@ t-SNE. J. Mach. Learn. Res. 9, 2579-2605. } \seealso{ Other dimensionality reduction methods: -\code{\link{AutoEncoder-class}}, \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, diff --git a/tests/testthat/test_NNMF.R b/tests/testthat/test_NNMF.R index 023c6f44..6dbfe73a 100644 --- a/tests/testthat/test_NNMF.R +++ b/tests/testthat/test_NNMF.R @@ -62,6 +62,7 @@ test_that("2D projection", { test_that("other arguments", { skip_if_no_NMF() + skip_on_cran() dim_3_args <- embed(input_trn, "NNMF", seed = 13, nrun = 10, ndim = 3, method = "KL", diff --git a/tests/testthat/test_autoencoder.R b/tests/testthat/test_autoencoder.R index 1f3ae1c2..7a5d17c0 100644 --- a/tests/testthat/test_autoencoder.R +++ b/tests/testthat/test_autoencoder.R @@ -1,215 +1,215 @@ -skip_if_no_tensorflow <- function() { - if (!requireNamespace(c("tensorflow", "reticulate"), quietly = TRUE) || - (!reticulate::py_module_available("tensorflow") && - Sys.getenv("BNET_FORCE_AUTOENCODER_TESTS") != "1")) - skip("TensorFlow not available for testing") -} -skip_if_no_keras <- function() { - if (!requireNamespace(c("keras", "reticulate"), quietly = TRUE) || - (!keras::is_keras_available() && - Sys.getenv("BNET_FORCE_AUTOENCODER_TESTS") != "1")) - skip("Keras not available for testing") -} - -test_that("Check if tensorflow is installed correctly.", { - skip_if_no_tensorflow() - requireNamespace("tensorflow", quietly = TRUE) - tensorflow::tf$compat$v1$disable_v2_behavior() - # I have not found a way to suppress the warning tf gives on first use. - sess <- tensorflow::tf$compat$v1$Session() - hello <- "Hello, TensorFlow!" - tf_hello <- tensorflow::tf$compat$v1$constant(hello) - tf_hello_res <- sess$run(tf_hello) - - # in python 3 this returns a `bytes` object $decode() transforms it into a - # sting, in python 2 this is a simple string - if(!is.character(tf_hello_res)) - tf_hello_res <- tf_hello_res$decode() - - ## print("tf_hello_res:") - ## print(str(tf_hello_res)) - ## print(tf_hello_res) - - expect(tf_hello_res == hello, paste("tensorflow does not work:\n", - "hello =", hello, "\n", - "sess$run(tf_hello) =", tf_hello_res)) -}) - -test_that("Check errors when building autoencoder.", { - skip_if_no_tensorflow() - iris_data <- as(iris[, 1:4], "dimRedData") - expect_error(embed(iris_data, "AutoEncoder", activation = "sigmoid"), - "declare an activation function for each layer") - expect_error(embed(iris_data, "AutoEncoder", n_hidden = c(1, 2, 2, 1)), - "the number of layers must be impair") - expect_error(embed(iris_data, "AutoEncoder", weight_decay = -1), - "weight decay must be > 0") - expect_error(embed(iris_data, "AutoEncoder", learning_rate = -1), - "learning rate must be > 0") - expect_error(embed(iris_data, "AutoEncoder", n_steps = -1), - "n_steps must be > 0") - expect_error(embed(iris_data, "AutoEncoder", n_hidden = c(4, 2, 4), ndim = 3), - "the middle of n_hidden must be equal to ndim") -}) - - -test_that("using autoencoder with parameters", { - skip_if_no_tensorflow() - iris_data <- as(iris[, 1:4], "dimRedData") - expect_equal(class(iris_data)[1], "dimRedData") - - ae <- lapply(1:2, function(x) embed(iris_data, "AutoEncoder", - n_hidden = c(10, x, 10), - ndim = x, - n_steps = 100)) - aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse")) - lapply(ae, function(x) expect_s4_class(x, "dimRedResult")) - - ## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions") - ## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions") - ## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions") - - lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]]))) - - - ae <- lapply(1:2, function(x) embed(iris_data, - "AutoEncoder", - n_hidden = c(10, x, 10), - ndim = x, - weight_decay = 0.1, - n_steps = 100)) - aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse")) - lapply(ae, function(x) expect_s4_class(x, "dimRedResult")) - - ## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions") - ## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions") - ## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions") - - lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]]))) - - - - ae <- lapply(1:2, function(x) embed(iris_data, - "AutoEncoder", - n_hidden = c(10, x, 10), - ndim = x, - learning_rate = 0.1, - weight_decay = 0.1, - n_steps = 100)) - aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse")) - lapply(ae, function(x) expect_s4_class(x, "dimRedResult")) - - ## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions") - ## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions") - ## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions") - - lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]]))) - - - - ae <- lapply(1:2, function(x) embed(iris_data, - "AutoEncoder", - n_hidden = c(10, x, 10), - activation = c("sigmoid", "sigmoid", "sigmoid"), - ndim = x, - learning_rate = 0.1, - weight_decay = 0.1, - n_steps = 100)) - aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse")) - lapply(ae, function(x) expect_s4_class(x, "dimRedResult")) - - aa <- lapply(c("tanh", "sigmoid", "relu", "elu"), - function(x) embed(iris_data, - "AutoEncoder", - n_hidden = c(10, 2, 10), - activation = c("sigmoid", "sigmoid", "sigmoid"), - ndim = 2, - learning_rate = 0.1, - weight_decay = 0.1, - n_steps = 100)) - aaq <- lapply(aa, function(x) quality(x, "reconstruction_rmse")) - lapply(aa, function(x) expect_s4_class(x, "dimRedResult")) - ## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions") - ## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions") - ## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions") - - lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]]))) -}) - -test_that("using autoencoder with autoencoder results", { - skip_if_no_tensorflow() - - tensorflow::tf$compat$v1$set_random_seed(2) - iris_data <- as(iris[, 1:4], "dimRedData") - expect_equal(class(iris_data)[1], "dimRedData") - - ae1 <- lapply(1:2, function(x) embed(iris_data, "AutoEncoder", - n_hidden = c(10, x, 10), - ndim = x, n_steps = 1)) - aq1 <- lapply(ae1, function(x) quality(x, "reconstruction_rmse")) - - ae2 <- lapply(ae1, function(x) embed(iris_data, "AutoEncoder", - autoencoder = x, n_steps = 1000)) - aq2 <- lapply(ae2, function(x) quality(x, "reconstruction_rmse")) +## skip_if_no_tensorflow <- function() { +## if (!requireNamespace(c("tensorflow", "reticulate"), quietly = TRUE) || +## (!reticulate::py_module_available("tensorflow") && +## Sys.getenv("BNET_FORCE_AUTOENCODER_TESTS") != "1")) +## skip("TensorFlow not available for testing") +## } +## skip_if_no_keras <- function() { +## if (!requireNamespace(c("keras3", "reticulate"), quietly = TRUE) || +## (!keras::is_keras_available() && +## Sys.getenv("BNET_FORCE_AUTOENCODER_TESTS") != "1")) +## skip("Keras not available for testing") +## } + +## test_that("Check if tensorflow is installed correctly.", { +## skip_if_no_tensorflow() +## requireNamespace("tensorflow", quietly = TRUE) +## tensorflow::tf$compat$v1$disable_v2_behavior() +## # I have not found a way to suppress the warning tf gives on first use. +## sess <- tensorflow::tf$compat$v1$Session() +## hello <- "Hello, TensorFlow!" +## tf_hello <- tensorflow::tf$compat$v1$constant(hello) +## tf_hello_res <- sess$run(tf_hello) + +## # in python 3 this returns a `bytes` object $decode() transforms it into a +## # sting, in python 2 this is a simple string +## if(!is.character(tf_hello_res)) +## tf_hello_res <- tf_hello_res$decode() + +## ## print("tf_hello_res:") +## ## print(str(tf_hello_res)) +## ## print(tf_hello_res) + +## expect(tf_hello_res == hello, paste("tensorflow does not work:\n", +## "hello =", hello, "\n", +## "sess$run(tf_hello) =", tf_hello_res)) +## }) + +## test_that("Check errors when building autoencoder.", { +## skip_if_no_tensorflow() +## iris_data <- as(iris[, 1:4], "dimRedData") +## expect_error(embed(iris_data, "AutoEncoder", activation = "sigmoid"), +## "declare an activation function for each layer") +## expect_error(embed(iris_data, "AutoEncoder", n_hidden = c(1, 2, 2, 1)), +## "the number of layers must be impair") +## expect_error(embed(iris_data, "AutoEncoder", weight_decay = -1), +## "weight decay must be > 0") +## expect_error(embed(iris_data, "AutoEncoder", learning_rate = -1), +## "learning rate must be > 0") +## expect_error(embed(iris_data, "AutoEncoder", n_steps = -1), +## "n_steps must be > 0") +## expect_error(embed(iris_data, "AutoEncoder", n_hidden = c(4, 2, 4), ndim = 3), +## "the middle of n_hidden must be equal to ndim") +## }) - lapply(ae1, function(x) expect_s4_class(x, "dimRedResult")) - lapply(ae2, function(x) expect_s4_class(x, "dimRedResult")) - - expect(aq1[[1]] > aq2[[1]], "the error should decrease with more steps") - expect(aq1[[2]] > aq2[[2]], "the error should decrease with more steps") - ## expect(aq1[[3]] > aq2[[3]], "the error should decrease with more steps") - ## expect(aq1[[4]] > aq2[[4]], "the error should decrease with more steps") - - lapply(1:length(ae1), function(x) expect_equal(x, getNDim(ae1[[x]]))) - lapply(1:length(ae2), function(x) expect_equal(x, getNDim(ae2[[x]]))) -}) -test_that("using autoencoder with keras", { - skip_if_no_tensorflow() - skip_if_no_keras() +## test_that("using autoencoder with parameters", { +## skip_if_no_tensorflow() +## iris_data <- as(iris[, 1:4], "dimRedData") +## expect_equal(class(iris_data)[1], "dimRedData") + +## ae <- lapply(1:2, function(x) embed(iris_data, "AutoEncoder", +## n_hidden = c(10, x, 10), +## ndim = x, +## n_steps = 100)) +## aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse")) +## lapply(ae, function(x) expect_s4_class(x, "dimRedResult")) + +## ## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions") +## ## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions") +## ## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions") + +## lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]]))) + + +## ae <- lapply(1:2, function(x) embed(iris_data, +## "AutoEncoder", +## n_hidden = c(10, x, 10), +## ndim = x, +## weight_decay = 0.1, +## n_steps = 100)) +## aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse")) +## lapply(ae, function(x) expect_s4_class(x, "dimRedResult")) + +## ## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions") +## ## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions") +## ## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions") + +## lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]]))) + + + +## ae <- lapply(1:2, function(x) embed(iris_data, +## "AutoEncoder", +## n_hidden = c(10, x, 10), +## ndim = x, +## learning_rate = 0.1, +## weight_decay = 0.1, +## n_steps = 100)) +## aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse")) +## lapply(ae, function(x) expect_s4_class(x, "dimRedResult")) + +## ## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions") +## ## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions") +## ## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions") + +## lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]]))) + + + +## ae <- lapply(1:2, function(x) embed(iris_data, +## "AutoEncoder", +## n_hidden = c(10, x, 10), +## activation = c("sigmoid", "sigmoid", "sigmoid"), +## ndim = x, +## learning_rate = 0.1, +## weight_decay = 0.1, +## n_steps = 100)) +## aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse")) +## lapply(ae, function(x) expect_s4_class(x, "dimRedResult")) + +## aa <- lapply(c("tanh", "sigmoid", "relu", "elu"), +## function(x) embed(iris_data, +## "AutoEncoder", +## n_hidden = c(10, 2, 10), +## activation = c("sigmoid", "sigmoid", "sigmoid"), +## ndim = 2, +## learning_rate = 0.1, +## weight_decay = 0.1, +## n_steps = 100)) +## aaq <- lapply(aa, function(x) quality(x, "reconstruction_rmse")) +## lapply(aa, function(x) expect_s4_class(x, "dimRedResult")) +## ## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions") +## ## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions") +## ## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions") + +## lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]]))) +## }) + +## test_that("using autoencoder with autoencoder results", { +## skip_if_no_tensorflow() - encoder <- function(i) list(keras::layer_dense(units = 10, - activation = "tanh"), - keras::layer_dense(units = i)) - decoder <- function() list(keras::layer_dense(units = 10, - activation = "tanh"), - keras::layer_dense(units = 4)) - - iris_data <- as(iris[, 1:4], "dimRedData") +## tensorflow::tf$compat$v1$set_random_seed(2) +## iris_data <- as(iris[, 1:4], "dimRedData") +## expect_equal(class(iris_data)[1], "dimRedData") - ae1 <- lapply(1:2, function(x) embed(iris_data, "AutoEncoder", - keras_graph = list(encoder = encoder(x), - decoder = decoder()), - n_steps = 2)) - aq1 <- lapply(ae1, function(x) quality(x, "reconstruction_rmse")) +## ae1 <- lapply(1:2, function(x) embed(iris_data, "AutoEncoder", +## n_hidden = c(10, x, 10), +## ndim = x, n_steps = 1)) +## aq1 <- lapply(ae1, function(x) quality(x, "reconstruction_rmse")) - ae2 <- lapply(ae1, function(x) embed(iris_data, "AutoEncoder", - autoencoder = x)) - aq2 <- lapply(ae2, function(x) quality(x, "reconstruction_rmse")) +## ae2 <- lapply(ae1, function(x) embed(iris_data, "AutoEncoder", +## autoencoder = x, n_steps = 1000)) +## aq2 <- lapply(ae2, function(x) quality(x, "reconstruction_rmse")) - lapply(ae1, function(x) expect_s4_class(x, "dimRedResult")) - lapply(ae2, function(x) expect_s4_class(x, "dimRedResult")) - - ## expect(aq1[[1]] > aq2[[1]], "the error should decrease with more steps") - ## expect(aq1[[2]] > aq2[[2]], "the error should decrease with more steps") - ## expect(aq1[[3]] > aq2[[3]], "the error should decrease with more steps") - ## expect(aq1[[4]] > aq2[[4]], "the error should decrease with more steps") - - lapply(1:length(ae1), function(x) expect_equal(x, getNDim(ae1[[x]]))) - lapply(1:length(ae2), function(x) expect_equal(x, getNDim(ae2[[x]]))) - -}) +## lapply(ae1, function(x) expect_s4_class(x, "dimRedResult")) +## lapply(ae2, function(x) expect_s4_class(x, "dimRedResult")) +## expect(aq1[[1]] > aq2[[1]], "the error should decrease with more steps") +## expect(aq1[[2]] > aq2[[2]], "the error should decrease with more steps") +## ## expect(aq1[[3]] > aq2[[3]], "the error should decrease with more steps") +## ## expect(aq1[[4]] > aq2[[4]], "the error should decrease with more steps") +## lapply(1:length(ae1), function(x) expect_equal(x, getNDim(ae1[[x]]))) +## lapply(1:length(ae2), function(x) expect_equal(x, getNDim(ae2[[x]]))) +## }) -## test_that("garbage collection", { +## test_that("using autoencoder with keras", { ## skip_if_no_tensorflow() -## tmp <- tf$get_session_handle(environment(ae[[1]]@apply)$dec) +## skip_if_no_keras() + +## encoder <- function(i) list(keras::layer_dense(units = 10, +## activation = "tanh"), +## keras::layer_dense(units = i)) +## decoder <- function() list(keras::layer_dense(units = 10, +## activation = "tanh"), +## keras::layer_dense(units = 4)) + +## iris_data <- as(iris[, 1:4], "dimRedData") -## tmp <- tf$get_default_session() +## ae1 <- lapply(1:2, function(x) embed(iris_data, "AutoEncoder", +## keras_graph = list(encoder = encoder(x), +## decoder = decoder()), +## n_steps = 2)) +## aq1 <- lapply(ae1, function(x) quality(x, "reconstruction_rmse")) + +## ae2 <- lapply(ae1, function(x) embed(iris_data, "AutoEncoder", +## autoencoder = x)) +## aq2 <- lapply(ae2, function(x) quality(x, "reconstruction_rmse")) + +## lapply(ae1, function(x) expect_s4_class(x, "dimRedResult")) +## lapply(ae2, function(x) expect_s4_class(x, "dimRedResult")) + +## ## expect(aq1[[1]] > aq2[[1]], "the error should decrease with more steps") +## ## expect(aq1[[2]] > aq2[[2]], "the error should decrease with more steps") +## ## expect(aq1[[3]] > aq2[[3]], "the error should decrease with more steps") +## ## expect(aq1[[4]] > aq2[[4]], "the error should decrease with more steps") + +## lapply(1:length(ae1), function(x) expect_equal(x, getNDim(ae1[[x]]))) +## lapply(1:length(ae2), function(x) expect_equal(x, getNDim(ae2[[x]]))) -## tmp$close -## tmp -## tf$get_session_handle() -## tf$Session() ## }) + + + +## ## test_that("garbage collection", { +## ## skip_if_no_tensorflow() +## ## tmp <- tf$get_session_handle(environment(ae[[1]]@apply)$dec) + +## ## tmp <- tf$get_default_session() + +## ## tmp$close +## ## tmp +## ## tf$get_session_handle() +## ## tf$Session() +## ## }) diff --git a/tests/testthat/test_embed.R b/tests/testthat/test_embed.R index e04784a8..7da50176 100644 --- a/tests/testthat/test_embed.R +++ b/tests/testthat/test_embed.R @@ -4,3 +4,20 @@ test_that("standard method is PCA", { expect_equal(res@method, "PCA") }) + +test_that("correctly convert .keep.org.data argument", { + res1 <- embed(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, + iris, "PCA", .keep.org.data = FALSE) + + + res2 <- embed(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, + iris, "PCA", .keep.org.data = TRUE) + + attr(res2@org.data, "assign") <- NULL + + expect_equal( + unname(as.matrix(iris[1:4])), + unname(as.matrix(getOrgData(res2)@data)) + ) + +}) diff --git a/tests/testthat/test_fastICA.R b/tests/testthat/test_fastICA.R index 88876af1..e497fcd6 100644 --- a/tests/testthat/test_fastICA.R +++ b/tests/testthat/test_fastICA.R @@ -1,5 +1,5 @@ test_that("general data conversions", { - if(!requireNamespace("FastICA", quietly = TRUE)) + if(!requireNamespace("fastICA", quietly = TRUE)) skip("FastICA not available") irisData <- as(iris[, 1:4], "dimRedData") expect_equal(class(irisData)[1], "dimRedData") diff --git a/tests/testthat/test_high_level_functions.R b/tests/testthat/test_high_level_functions.R index dfa6d5eb..ddcada1c 100644 --- a/tests/testthat/test_high_level_functions.R +++ b/tests/testthat/test_high_level_functions.R @@ -2,7 +2,7 @@ test_that("high level functions working?", { embed_methods <- dimRedMethodList(filter = TRUE) quality_methods <- dimRedQualityList(filter = TRUE) scurve <- loadDataSet("3D S Curve", n = 300) - for(i in 1:ncol(scurve@data)){ + for (i in seq_len(ncol(scurve@data))){ scurve@data[, i] <- scurve@data[, i] - min(scurve@data[, i]) } diff --git a/tests/testthat/test_quality.R b/tests/testthat/test_quality.R index 9fbc3c51..d3416e62 100644 --- a/tests/testthat/test_quality.R +++ b/tests/testthat/test_quality.R @@ -1,6 +1,8 @@ test_that("quality", { irisData <- loadDataSet("Iris") + # remove duplicates + irisData <- irisData[!duplicated(irisData@data), ] parsPCA <- list(center = TRUE, scale. = TRUE) resPCA <- do.call(function(...) embed(irisData, "PCA", ...), parsPCA) @@ -53,9 +55,9 @@ test_that("rmse_by_ndim", { skip("DRR not available") } - set.seed(1) - ir <- loadDataSet("Iris") + + set.seed(1) ir.drr <- embed(ir, "DRR", .mute = c("message", "output"), ndim = ndims(ir)) ir.pca <- embed(ir, "PCA", ndim = ndims(ir)) diff --git a/vignettes/dimensionality-reduction.Rnw b/vignettes/dimensionality-reduction.Rnw index 54a8cd23..131422e5 100644 --- a/vignettes/dimensionality-reduction.Rnw +++ b/vignettes/dimensionality-reduction.Rnw @@ -1027,46 +1027,46 @@ The plots in figure~\ref{fig:plotexample} are produced by the following code: % \centering <<"pca_isomap_example",include=FALSE,fig.width=4,fig.height=4>>= if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") { -library(dimRed); library(ggplot2); #library(dplyr); library(tidyr) -## define which methods to apply -embed_methods <- c("Isomap", "PCA") -## load test data set -data_set <- loadDataSet("3D S Curve", n = 1000) -## apply dimensionality reduction -data_emb <- lapply(embed_methods, function(x) embed(data_set, x)) -names(data_emb) <- embed_methods -## plot data set, embeddings, and quality analysis -## plot(data_set, type = "3vars") -## lapply(data_emb, plot, type = "2vars") -## plot_R_NX(data_emb) - -add_label <- function(label) -grid::grid.text(label, 0.2, 1, hjust = 0, vjust = 1, -gp = grid::gpar(fontface = "bold", -cex = 1.5)) -## pdf('~/phd/text/dimRedPackage/plots/plot_example.pdf', width = 4, height = 4) -## plot the results -plot(data_set, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16) -add_label("a") -par(mar = c(4, 4, 0, 0) + 0.1, bty = "n", las = 1) -plot(data_emb$Isomap, type = "2vars", pch = 16) -add_label("b") -plot(data_emb$PCA, type = "2vars", pch = 16) -add_label("d") -## calculate quality scores -print( -plot_R_NX(data_emb) + -theme(legend.title = element_blank(), -legend.position = c(0.5, 0.1), -legend.justification = c(0.5, 0.1)) -) -add_label("c") + library(dimRed); library(ggplot2); #library(dplyr); library(tidyr) + ## define which methods to apply + embed_methods <- c("Isomap", "PCA") + ## load test data set + data_set <- loadDataSet("3D S Curve", n = 1000) + ## apply dimensionality reduction + data_emb <- lapply(embed_methods, function(x) embed(data_set, x)) + names(data_emb) <- embed_methods + ## plot data set, embeddings, and quality analysis + ## plot(data_set, type = "3vars") + ## lapply(data_emb, plot, type = "2vars") + ## plot_R_NX(data_emb) + + add_label <- function(label) + grid::grid.text(label, 0.2, 1, hjust = 0, vjust = 1, + gp = grid::gpar(fontface = "bold", + cex = 1.5)) + ## pdf('~/phd/text/dimRedPackage/plots/plot_example.pdf', width = 4, height = 4) + ## plot the results + plot(data_set, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16) + add_label("a") + par(mar = c(4, 4, 0, 0) + 0.1, bty = "n", las = 1) + plot(data_emb$Isomap, type = "2vars", pch = 16) + add_label("b") + plot(data_emb$PCA, type = "2vars", pch = 16) + add_label("d") + ## calculate quality scores + print( + plot_R_NX(data_emb) + + theme(legend.title = element_blank(), + legend.position = c(0.5, 0.1), + legend.justification = c(0.5, 0.1)) + ) + add_label("c") } else { -# These cannot all be plot(1:10)!!! It's a mistery to me. -plot(1:10) -barplot(1:10) -hist(1:10) -plot(1:10) + # These cannot all be plot(1:10)!!! It's a mistery to me. + plot(1:10) + barplot(1:10) + hist(1:10) + plot(1:10) } @ \includegraphics[page=1,width=.45\textwidth]{figure/pca_isomap_example-1.pdf} @@ -1142,44 +1142,44 @@ a: \centering <>= if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") { -library(dimRed) -library(cccd) -## Load data -ss <- loadDataSet("3D S Curve", n = 500) -## Parameter space -kk <- floor(seq(5, 100, length.out = 40)) -## Embedding over parameter space -emb <- lapply(kk, function(x) embed(ss, "Isomap", knn = x)) -## Quality over embeddings -qual <- sapply(emb, function(x) quality(x, "Q_local")) -## Find best value for K -ind_max <- which.max(qual) -k_max <- kk[ind_max] - -add_label <- function(label){ - par(xpd = TRUE) - b = par("usr") - text(b[1], b[4], label, adj = c(0, 1), cex = 1.5, font = 2) - par(xpd = FALSE) -} - -names(qual) <- kk + library(dimRed) + library(cccd) + ## Load data + ss <- loadDataSet("3D S Curve", n = 500) + ## Parameter space + kk <- floor(seq(5, 100, length.out = 40)) + ## Embedding over parameter space + emb <- lapply(kk, function(x) embed(ss, "Isomap", knn = x)) + ## Quality over embeddings + qual <- sapply(emb, function(x) quality(x, "Q_local")) + ## Find best value for K + ind_max <- which.max(qual) + k_max <- kk[ind_max] + + add_label <- function(label){ + par(xpd = TRUE) + b = par("usr") + text(b[1], b[4], label, adj = c(0, 1), cex = 1.5, font = 2) + par(xpd = FALSE) + } + + names(qual) <- kk } @ <<"select_k",include=FALSE,fig.width=11,fig.height=5>>= -if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") { -par(mfrow = c(1, 2), - mar = c(5, 4, 0, 0) + 0.1, - oma = c(0, 0, 0, 0)) -plot(kk, qual, type = "l", xlab = "k", ylab = expression(Q[local]), bty = "n") -abline(v = k_max, col = "red") -add_label("a") -plot(ss, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16) -add_label("b") +if (Sys.getenv("BNET_BUILD_VIGNETTE") != "") { + par(mfrow = c(1, 2), + mar = c(5, 4, 0, 0) + 0.1, + oma = c(0, 0, 0, 0)) + plot(kk, qual, type = "l", xlab = "k", ylab = expression(Q[local]), bty = "n") + abline(v = k_max, col = "red") + add_label("a") + plot(ss, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16) + add_label("b") } else { -plot(1:10) -plot(1:10) + plot(1:10) + plot(1:10) } @ @@ -1274,13 +1274,13 @@ visualization of the matrix can be found in Figure~\ref{fig:qualityexample}. % \centering <<"plot_quality",include=FALSE>>= if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") { -embed_methods <- dimRedMethodList() -quality_methods <- c("Q_local", "Q_global", "AUC_lnK_R_NX", - "cophenetic_correlation") -iris_data <- loadDataSet("Iris") -quality_results <- matrix( - NA, length(embed_methods), length(quality_methods), - dimnames = list(embed_methods, quality_methods) + embed_methods <- dimRedMethodList() + quality_methods <- c("Q_local", "Q_global", "AUC_lnK_R_NX", + "cophenetic_correlation") + iris_data <- loadDataSet("Iris") + quality_results <- matrix( + NA, length(embed_methods), length(quality_methods), + dimnames = list(embed_methods, quality_methods) ) embedded_data <- list() @@ -1295,14 +1295,14 @@ quality_results <- quality_results[order(rowMeans(quality_results)), ] palette(c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e")) col_hsv <- rgb2hsv(col2rgb(palette())) ## col_hsv["v", ] <- col_hsv["v", ] * 3 / 1 -palette(hsv(col_hsv["h",], col_hsv["s",], col_hsv["v",])) +palette(hsv(col_hsv["h", ], col_hsv["s", ], col_hsv["v", ])) par(mar = c(2, 8, 0, 0) + 0.1) barplot(t(quality_results), beside = TRUE, col = 1:4, legend.text = quality_methods, horiz = TRUE, las = 1, cex.names = 0.85, args.legend = list(x = "topleft", bg = "white", cex = 0.8)) } else { -plot(1:10) + plot(1:10) } @ \includegraphics[width=.5\textwidth]{figure/plot_quality-1.pdf} @@ -1330,9 +1330,10 @@ quality_results <- matrix( embedded_data <- list() for (e in embed_methods) { -embedded_data[[e]] <- embed(scurve, e) -for (q in quality_methods) - try(quality_results[e, q] <- quality(embedded_data[[e]], q)) + embedded_data[[e]] <- embed(scurve, e) + for (q in quality_methods) { + try(quality_results[e, q] <- quality(embedded_data[[e]], q)) + } } @