speed up slice_head() and slice_tail() (#219)

mgirlich · web-flow · commit c887a262070a · 2021-03-08T07:21:02.000-06:00
diff --git a/NEWS.md b/NEWS.md
@@ -3,7 +3,7 @@
 
 * `filter()` works for negated logical columns (@mgirlich, @211).
 
-* speed up `slice_min()` and `slice_max()` after `group_by()` (@mgirlich, #216).
+* speed up `slice_*()` functions after `group_by()` (@mgirlich, #216).
 
 * `slice_max()` now works when ordering by a character column (@mgirlich, #218).
 
diff --git a/R/step-subset-slice.R b/R/step-subset-slice.R
@@ -4,9 +4,8 @@
 #'
 #' @description
 #' These are methods for the dplyr [slice()], `slice_head()`, `slice_tail()`,
-#' `slice_min()`, `slice_max()` and `slice_sample()` generics. `slice()`
-#' and `slice_sample()` are translated to the `i` argument of `[.data.table`,
-#' all others are translated to the `j` argument.
+#' `slice_min()`, `slice_max()` and `slice_sample()` generics. They are
+#' translated to the `i` argument of `[.data.table`.
 #'
 #' Unlike dplyr, `slice()` (and `slice()` alone) returns the same number of
 #' rows per group, regardless of whether or not the indices appear in each
@@ -80,11 +79,11 @@ slice.data.table <- function(.data, ...) {
 slice_head.dtplyr_step <- function(.data, ..., n, prop) {
   ellipsis::check_dots_empty()
   size <- check_slice_size(n, prop)
-  j <- switch(size$type,
-    n = expr(head(.SD, !!size$n)),
-    prop = expr(head(.SD, !!size$prop * .N)),
+  i <- switch(size$type,
+    n = expr(seq.int(min(!!size$n, .N))),
+    prop = expr(seq.int(!!size$prop * .N)),
   )
-  step_subset_j(.data, j = j)
+  step_subset_i(.data, i = i)
 }
 
 #' @rdname slice.dtplyr_step
@@ -93,11 +92,11 @@ slice_head.dtplyr_step <- function(.data, ..., n, prop) {
 slice_tail.dtplyr_step <- function(.data, ..., n, prop) {
   ellipsis::check_dots_empty()
   size <- check_slice_size(n, prop)
-  j <- switch(size$type,
-    n = expr(tail(.SD, !!size$n)),
-    prop = expr(tail(.SD, floor(!!size$prop * .N))),
+  n_sequence <- switch(size$type,
+    n = expr(min(!!size$n, .N)),
+    prop = expr(!!size$prop * .N),
   )
-  step_subset_j(.data, j = j)
+  step_subset_i(.data, i = expr(seq.int(.N - !!n_sequence + 1, .N)))
 }
 
 #' @rdname slice.dtplyr_step
diff --git a/man/slice.dtplyr_step.Rd b/man/slice.dtplyr_step.Rd