diff --git a/NAMESPACE b/NAMESPACE index 12aac1a6..a6cfc9b9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -115,6 +115,7 @@ export(list_merge) export(list_modify) export(list_rbind) export(list_simplify) +export(list_split) export(list_transpose) export(lmap) export(lmap_at) diff --git a/R/list-split.R b/R/list-split.R new file mode 100644 index 00000000..85a85c25 --- /dev/null +++ b/R/list-split.R @@ -0,0 +1,103 @@ +#' Split a list or vector into chunks +#' +#' `list_split()` splits a list or vector into smaller chunks, providing +#' a conceptual inverse to [list_flatten()]. You can split by chunk size +#' or by providing a grouping vector. +#' +#' @param x A list or vector. To split data frames, use `is_node = is.list`. +#' @param n An integer specifying the size of each chunk. If the length +#' of `x` is not evenly divisible by `n`, the last chunk will be smaller. +#' @param groups A vector the same length as `x` that defines how to +#' group elements. Elements with the same group value will be placed +#' in the same chunk. +#' @param is_node A predicate function that determines whether an object +#' is a "node" (by default, a list). Set to `is.list` to enable +#' splitting of data frames by rows. See [modify_tree()] for more details. +#' @inheritParams rlang::args_dots_empty +#' @return A list where each element contains a chunk of the original input. +#' @export +#' @examples +#' # Split by chunk size +#' list_split(1:10, n = 3) +#' +#' # Split by grouping vector +#' list_split(letters[1:6], groups = c(1, 1, 2, 2, 3, 3)) +#' +#' # Split a list +#' x <- list(a = 1, b = 2, c = 3, d = 4) +#' list_split(x, n = 2) +#' +#' # Split data frames by rows +#' df <- data.frame(x = 1:4, y = 5:8) +#' list_split(df, n = 2, is_node = is.list) +list_split <- function(x, ..., n = NULL, groups = NULL, is_node = NULL) { + is_node <- as_is_node(is_node) + if (!is_node(x) && !is.atomic(x)) { + cli::cli_abort( + "{.arg x} must be a list, vector, or other node-like object." + ) + } + + check_dots_empty() + + # validate exactly one of n or groups is provided + if (is.null(n) && is.null(groups)) { + cli::cli_abort("Must provide either {.arg n} or {.arg groups}.") + } + + if (!is.null(n) && !is.null(groups)) { + cli::cli_abort("Can't provide both {.arg n} and {.arg groups}.") + } + + # validate inputs + if (!is.null(n)) { + if ( + !is.numeric(n) || length(n) != 1 || is.na(n) || n <= 0 || n != floor(n) + ) { + cli::cli_abort("{.arg n} must be a positive integer.") + } + n <- as.integer(n) + } + + if (!is.null(groups)) { + if (length(groups) != length(x)) { + cli::cli_abort( + "{.arg groups} must have the same length as {.arg x} ({length(x)}).", + "i" = "{.arg groups} has length {length(groups)}." + ) + } + } + + # generate indices for splitting + indices <- generate_split_indices(x, n, groups) + + # use vec_chop for splitting + result <- vec_chop(x, indices = indices) + + # preserve names from groups if they exist + if (!is.null(groups) && !is.null(names(indices))) { + names(result) <- names(indices) + } + + result +} + +generate_split_indices <- function(x, n = NULL, groups = NULL) { + if (!is.null(n)) { + # split by chunk size + len <- length(x) + if (len == 0) { + return(list()) + } + + chunk_ids <- ceiling(seq_len(len) / n) + split(seq_len(len), chunk_ids) + } else { + # split by groups + if (length(x) == 0) { + return(list()) + } + + split(seq_along(x), groups) + } +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 8f393a6f..d966db3d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -91,6 +91,7 @@ reference: - list_flatten - list_modify - list_simplify + - list_split - list_transpose - reduce diff --git a/man/list_split.Rd b/man/list_split.Rd new file mode 100644 index 00000000..10847ec7 --- /dev/null +++ b/man/list_split.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/list-split.R +\name{list_split} +\alias{list_split} +\title{Split a list or vector into chunks} +\usage{ +list_split(x, ..., n = NULL, groups = NULL, is_node = NULL) +} +\arguments{ +\item{x}{A list or vector. To split data frames, use \code{is_node = is.list}.} + +\item{...}{These dots are for future extensions and must be empty.} + +\item{n}{An integer specifying the size of each chunk. If the length +of \code{x} is not evenly divisible by \code{n}, the last chunk will be smaller.} + +\item{groups}{A vector the same length as \code{x} that defines how to +group elements. Elements with the same group value will be placed +in the same chunk.} + +\item{is_node}{A predicate function that determines whether an object +is a "node" (by default, a list). Set to \code{is.list} to enable +splitting of data frames by rows. See \code{\link[=modify_tree]{modify_tree()}} for more details.} +} +\value{ +A list where each element contains a chunk of the original input. +} +\description{ +\code{list_split()} splits a list or vector into smaller chunks, providing +a conceptual inverse to \code{\link[=list_flatten]{list_flatten()}}. You can split by chunk size +or by providing a grouping vector. +} +\examples{ +# Split by chunk size +list_split(1:10, n = 3) + +# Split by grouping vector +list_split(letters[1:6], groups = c(1, 1, 2, 2, 3, 3)) + +# Split a list +x <- list(a = 1, b = 2, c = 3, d = 4) +list_split(x, n = 2) + +# Split data frames by rows +df <- data.frame(x = 1:4, y = 5:8) +list_split(df, n = 2, is_node = is.list) +} diff --git a/tests/testthat/_snaps/list-split.md b/tests/testthat/_snaps/list-split.md new file mode 100644 index 00000000..ef87f243 --- /dev/null +++ b/tests/testthat/_snaps/list-split.md @@ -0,0 +1,90 @@ +# n validation works + + Code + list_split(1:5, n = 0) + Condition + Error in `list_split()`: + ! `n` must be a positive integer. + +--- + + Code + list_split(1:5, n = -1) + Condition + Error in `list_split()`: + ! `n` must be a positive integer. + +--- + + Code + list_split(1:5, n = 2.5) + Condition + Error in `list_split()`: + ! `n` must be a positive integer. + +--- + + Code + list_split(1:5, n = c(1, 2)) + Condition + Error in `list_split()`: + ! `n` must be a positive integer. + +--- + + Code + list_split(1:5, n = NA) + Condition + Error in `list_split()`: + ! `n` must be a positive integer. + +# groups validation works + + Code + list_split(1:5, groups = 1:3) + Condition + Error in `list_split()`: + ! `groups` must have the same length as `x` (5). + +--- + + Code + list_split(1:5, groups = character()) + Condition + Error in `list_split()`: + ! `groups` must have the same length as `x` (5). + +# parameter validation works + + Code + list_split(1:5) + Condition + Error in `list_split()`: + ! Must provide either `n` or `groups`. + +--- + + Code + list_split(1:5, n = 2, groups = c(1, 1, 2, 2, 3)) + Condition + Error in `list_split()`: + ! Can't provide both `n` and `groups`. + +--- + + Code + list_split("not a list or vector") + Condition + Error in `list_split()`: + ! Must provide either `n` or `groups`. + +# dots validation works + + Code + list_split(1:5, n = 2, extra_arg = "bad") + Condition + Error in `list_split()`: + ! `...` must be empty. + x Problematic argument: + * extra_arg = "bad" + diff --git a/tests/testthat/test-list-split.R b/tests/testthat/test-list-split.R new file mode 100644 index 00000000..64db7279 --- /dev/null +++ b/tests/testthat/test-list-split.R @@ -0,0 +1,78 @@ +test_that("splitting by chunk size works", { + expect_equal(list_split(1:6, n = 2), list(1:2, 3:4, 5:6)) + expect_equal(list_split(1:10, n = 3), list(1:3, 4:6, 7:9, 10L)) + expect_equal(list_split(letters[1:4], n = 2), list(c("a", "b"), c("c", "d"))) +}) + +test_that("splitting by groups works", { + expect_equal( + list_split(1:6, groups = c(1, 1, 2, 2, 3, 3)), + list("1" = c(1L, 2L), "2" = c(3L, 4L), "3" = c(5L, 6L)) + ) + expect_equal( + list_split(letters[1:4], groups = c("x", "y", "x", "y")), + list("x" = c("a", "c"), "y" = c("b", "d")) + ) +}) + +test_that("splitting lists preserves structure", { + x <- list(a = 1, b = 2, c = 3, d = 4) + result <- list_split(x, n = 2) + expect_equal(result, list(list(a = 1, b = 2), list(c = 3, d = 4))) + expect_true(is.list(result[[1]])) + expect_true(is.list(result[[2]])) +}) + +test_that("empty inputs work", { + expect_equal(list_split(integer(), n = 2), list()) + expect_equal(list_split(list(), n = 1), list()) + expect_equal(list_split(integer(), groups = integer()), list()) +}) + +test_that("single element inputs work", { + expect_equal(list_split(1L, n = 1), list(1L)) + expect_equal(list_split(list(x = 1), n = 1), list(list(x = 1))) + expect_equal(list_split(5L, groups = "a"), list("a" = 5L)) +}) + +test_that("n validation works", { + expect_snapshot(list_split(1:5, n = 0), error = TRUE) + expect_snapshot(list_split(1:5, n = -1), error = TRUE) + expect_snapshot(list_split(1:5, n = 2.5), error = TRUE) + expect_snapshot(list_split(1:5, n = c(1, 2)), error = TRUE) + expect_snapshot(list_split(1:5, n = NA), error = TRUE) +}) + +test_that("groups validation works", { + expect_snapshot(list_split(1:5, groups = 1:3), error = TRUE) + expect_snapshot(list_split(1:5, groups = character()), error = TRUE) +}) + +test_that("parameter validation works", { + expect_snapshot(list_split(1:5), error = TRUE) + expect_snapshot( + list_split(1:5, n = 2, groups = c(1, 1, 2, 2, 3)), + error = TRUE + ) + expect_snapshot(list_split("not a list or vector"), error = TRUE) +}) + +test_that("dots validation works", { + expect_snapshot(list_split(1:5, n = 2, extra_arg = "bad"), error = TRUE) +}) + +test_that("chunk size accepts numeric that converts to integer", { + expect_equal(list_split(1:4, n = 2), list_split(1:4, n = 2L)) + expect_equal(list_split(1:4, n = 2.0), list_split(1:4, n = 2L)) +}) + +test_that("data frames require is_node = is.list", { + df <- data.frame(x = 1:2, y = 3:4) + expect_error(list_split(df, n = 1), "must be a list, vector") + + # with is_node = is.list, data frames split by rows + result <- list_split(df, n = 1, is_node = is.list) + expect_length(result, 2) # 2 rows = 2 chunks + expect_equal(result[[1]], data.frame(x = 1L, y = 3L)) + expect_equal(result[[2]], data.frame(x = 2L, y = 4L)) +})