Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ export(list_merge)
export(list_modify)
export(list_rbind)
export(list_simplify)
export(list_split)
export(list_transpose)
export(lmap)
export(lmap_at)
Expand Down
103 changes: 103 additions & 0 deletions R/list-split.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#' Split a list or vector into chunks
#'
#' `list_split()` splits a list or vector into smaller chunks, providing
#' a conceptual inverse to [list_flatten()]. You can split by chunk size
#' or by providing a grouping vector.
#'
#' @param x A list or vector. To split data frames, use `is_node = is.list`.
#' @param n An integer specifying the size of each chunk. If the length
#' of `x` is not evenly divisible by `n`, the last chunk will be smaller.
#' @param groups A vector the same length as `x` that defines how to
#' group elements. Elements with the same group value will be placed
#' in the same chunk.
#' @param is_node A predicate function that determines whether an object
#' is a "node" (by default, a list). Set to `is.list` to enable
#' splitting of data frames by rows. See [modify_tree()] for more details.
#' @inheritParams rlang::args_dots_empty
#' @return A list where each element contains a chunk of the original input.
#' @export
#' @examples
#' # Split by chunk size
#' list_split(1:10, n = 3)
#'
#' # Split by grouping vector
#' list_split(letters[1:6], groups = c(1, 1, 2, 2, 3, 3))
#'
#' # Split a list
#' x <- list(a = 1, b = 2, c = 3, d = 4)
#' list_split(x, n = 2)
#'
#' # Split data frames by rows
#' df <- data.frame(x = 1:4, y = 5:8)
#' list_split(df, n = 2, is_node = is.list)
list_split <- function(x, ..., n = NULL, groups = NULL, is_node = NULL) {
is_node <- as_is_node(is_node)
if (!is_node(x) && !is.atomic(x)) {
cli::cli_abort(
"{.arg x} must be a list, vector, or other node-like object."
)
}

check_dots_empty()

# validate exactly one of n or groups is provided
if (is.null(n) && is.null(groups)) {
cli::cli_abort("Must provide either {.arg n} or {.arg groups}.")
}

if (!is.null(n) && !is.null(groups)) {
cli::cli_abort("Can't provide both {.arg n} and {.arg groups}.")
}

# validate inputs
if (!is.null(n)) {
if (
!is.numeric(n) || length(n) != 1 || is.na(n) || n <= 0 || n != floor(n)
) {
cli::cli_abort("{.arg n} must be a positive integer.")
}
n <- as.integer(n)
}

if (!is.null(groups)) {
if (length(groups) != length(x)) {
cli::cli_abort(
"{.arg groups} must have the same length as {.arg x} ({length(x)}).",
"i" = "{.arg groups} has length {length(groups)}."
)
}
}

# generate indices for splitting
indices <- generate_split_indices(x, n, groups)

# use vec_chop for splitting
result <- vec_chop(x, indices = indices)

# preserve names from groups if they exist
if (!is.null(groups) && !is.null(names(indices))) {
names(result) <- names(indices)
}

result
}

generate_split_indices <- function(x, n = NULL, groups = NULL) {
if (!is.null(n)) {
# split by chunk size
len <- length(x)
if (len == 0) {
return(list())
}

chunk_ids <- ceiling(seq_len(len) / n)
split(seq_len(len), chunk_ids)
} else {
# split by groups
if (length(x) == 0) {
return(list())
}

split(seq_along(x), groups)
}
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ reference:
- list_flatten
- list_modify
- list_simplify
- list_split
- list_transpose
- reduce

Expand Down
47 changes: 47 additions & 0 deletions man/list_split.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 90 additions & 0 deletions tests/testthat/_snaps/list-split.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# n validation works

Code
list_split(1:5, n = 0)
Condition
Error in `list_split()`:
! `n` must be a positive integer.

---

Code
list_split(1:5, n = -1)
Condition
Error in `list_split()`:
! `n` must be a positive integer.

---

Code
list_split(1:5, n = 2.5)
Condition
Error in `list_split()`:
! `n` must be a positive integer.

---

Code
list_split(1:5, n = c(1, 2))
Condition
Error in `list_split()`:
! `n` must be a positive integer.

---

Code
list_split(1:5, n = NA)
Condition
Error in `list_split()`:
! `n` must be a positive integer.

# groups validation works

Code
list_split(1:5, groups = 1:3)
Condition
Error in `list_split()`:
! `groups` must have the same length as `x` (5).

---

Code
list_split(1:5, groups = character())
Condition
Error in `list_split()`:
! `groups` must have the same length as `x` (5).

# parameter validation works

Code
list_split(1:5)
Condition
Error in `list_split()`:
! Must provide either `n` or `groups`.

---

Code
list_split(1:5, n = 2, groups = c(1, 1, 2, 2, 3))
Condition
Error in `list_split()`:
! Can't provide both `n` and `groups`.

---

Code
list_split("not a list or vector")
Condition
Error in `list_split()`:
! Must provide either `n` or `groups`.

# dots validation works

Code
list_split(1:5, n = 2, extra_arg = "bad")
Condition
Error in `list_split()`:
! `...` must be empty.
x Problematic argument:
* extra_arg = "bad"

78 changes: 78 additions & 0 deletions tests/testthat/test-list-split.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
test_that("splitting by chunk size works", {
expect_equal(list_split(1:6, n = 2), list(1:2, 3:4, 5:6))
expect_equal(list_split(1:10, n = 3), list(1:3, 4:6, 7:9, 10L))
expect_equal(list_split(letters[1:4], n = 2), list(c("a", "b"), c("c", "d")))
})

test_that("splitting by groups works", {
expect_equal(
list_split(1:6, groups = c(1, 1, 2, 2, 3, 3)),
list("1" = c(1L, 2L), "2" = c(3L, 4L), "3" = c(5L, 6L))
)
expect_equal(
list_split(letters[1:4], groups = c("x", "y", "x", "y")),
list("x" = c("a", "c"), "y" = c("b", "d"))
)
})

test_that("splitting lists preserves structure", {
x <- list(a = 1, b = 2, c = 3, d = 4)
result <- list_split(x, n = 2)
expect_equal(result, list(list(a = 1, b = 2), list(c = 3, d = 4)))
expect_true(is.list(result[[1]]))
expect_true(is.list(result[[2]]))
})

test_that("empty inputs work", {
expect_equal(list_split(integer(), n = 2), list())
expect_equal(list_split(list(), n = 1), list())
expect_equal(list_split(integer(), groups = integer()), list())
})

test_that("single element inputs work", {
expect_equal(list_split(1L, n = 1), list(1L))
expect_equal(list_split(list(x = 1), n = 1), list(list(x = 1)))
expect_equal(list_split(5L, groups = "a"), list("a" = 5L))
})

test_that("n validation works", {
expect_snapshot(list_split(1:5, n = 0), error = TRUE)
expect_snapshot(list_split(1:5, n = -1), error = TRUE)
expect_snapshot(list_split(1:5, n = 2.5), error = TRUE)
expect_snapshot(list_split(1:5, n = c(1, 2)), error = TRUE)
expect_snapshot(list_split(1:5, n = NA), error = TRUE)
})

test_that("groups validation works", {
expect_snapshot(list_split(1:5, groups = 1:3), error = TRUE)
expect_snapshot(list_split(1:5, groups = character()), error = TRUE)
})

test_that("parameter validation works", {
expect_snapshot(list_split(1:5), error = TRUE)
expect_snapshot(
list_split(1:5, n = 2, groups = c(1, 1, 2, 2, 3)),
error = TRUE
)
expect_snapshot(list_split("not a list or vector"), error = TRUE)
})

test_that("dots validation works", {
expect_snapshot(list_split(1:5, n = 2, extra_arg = "bad"), error = TRUE)
})

test_that("chunk size accepts numeric that converts to integer", {
expect_equal(list_split(1:4, n = 2), list_split(1:4, n = 2L))
expect_equal(list_split(1:4, n = 2.0), list_split(1:4, n = 2L))
})

test_that("data frames require is_node = is.list", {
df <- data.frame(x = 1:2, y = 3:4)
expect_error(list_split(df, n = 1), "must be a list, vector")

# with is_node = is.list, data frames split by rows
result <- list_split(df, n = 1, is_node = is.list)
expect_length(result, 2) # 2 rows = 2 chunks
expect_equal(result[[1]], data.frame(x = 1L, y = 3L))
expect_equal(result[[2]], data.frame(x = 2L, y = 4L))
})
Loading