From a5d68f303ba3498a0c528a25f275f96dbef2c94e Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Sat, 16 Aug 2025 11:36:24 +0000
Subject: [PATCH 1/8] added a warning

---
 NEWS.md               | 15 +++++++++++++++
 R/data.table.R        | 10 ++++++++++
 inst/tests/tests.Rraw |  9 +++++++++
 man/data.table.Rd     |  4 ++--
 4 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/NEWS.md b/NEWS.md
index fc69449438..b14c50454d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -107,6 +107,21 @@
 
 18. `fwrite` now allows `dec` to be the same as `sep` for edge cases where only one will be written, e.g. 0-row or 1-column tables. [#7227](https://github.com/Rdatatable/data.table/issues/7227). Thanks @MichaelChirico for the report and @venom1204 for the fix.
 
+22. Using `by=` or `keyby=` with a simple numeric or character vector in `j` (e.g. `DT[, 1:2, by=grp]`) used to silently ignore the grouping argument. This now issues a warning to alert the user that grouping is not applied in this syntax and guides them to use the `.SD` idiom instead. [#5397](https://github.com/Rdatatable/data.table/issues/5397). Thanks to @mcol for the report and @venom1204 for the fix.
+
+    ```r
+    DT = data.table(a=1:4, grp=c(1,1,2,2))
+    DT[, 1, by = grp]
+    #      a
+    #  <int>
+    # 1:     1
+    # 2:     2
+    # 3:     3
+    # 4:     4
+    # Warning message:
+    # `by` or `keyby` is ignored when `j` is a numeric vector...
+    ```
+
 ### NOTES
 
 1. The following in-progress deprecations have proceeded:
diff --git a/R/data.table.R b/R/data.table.R
index c64c391b33..b7654b4204 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -739,6 +739,12 @@ replace_dot_alias = function(e) {
       if (!length(j) && !notj) return( null.data.table() )
       if (is.factor(j)) j = as.character(j)  # fix for FR: #358
       if (is.character(j)) {
+        if (!missingby && (missing(with) || isTRUE(with))) {
+            warning(
+                "`by` or `keyby` is ignored when `j` is a character vector used for column selection. ",
+                "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]"
+            )
+        }
         if (notj) {
           if (anyNA(idx <- chmatch(j, names_x)))
             warningf(ngettext(sum(is.na(idx)), "column not removed because not found: %s", "columns not removed because not found: %s"),
@@ -762,6 +768,10 @@ replace_dot_alias = function(e) {
         # else the NA in ansvals are for join inherited scope (test 1973), and NA could be in irows from join and data in i should be returned (test 1977)
         #   in both cases leave to the R-level subsetting of i and x together further below
       } else if (is.numeric(j)) {
+        if (!missingby) {
+            warning(
+                "`by` or `keyby` is ignored when `j` is a numeric vector used for column selection. ", "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]")
+        }
         j = as.integer(j)
         if (any(w <- (j>ncol(x)))) stopf("Item %d of j is %d which is outside the column number range [1,ncol=%d]", idx <- which.first(w), j[idx], ncol(x))
         j = j[j!=0L]
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index d7ffc476bb..ac365e07f8 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21620,3 +21620,12 @@ local({
   test(2338.9, {fwrite(dd, f, forceDecimal=FALSE); fread(f)}, di)
 })
 
+# 5397 - keyby/key ignored if numeric indices used in j
+DT = data.table(a=1:4, b=5:8, g=c(1,1,2,2))
+test(2339.1, DT[, 1:2, by=g], DT[, 1:2], warning="`by` or `keyby` is ignored")
+test(2339.2, DT[, 2:1, keyby=g], DT[, 2:1], warning="`by` or `keyby` is ignored")
+test(2339.3, DT[, c("b", "a"), by=g, with=FALSE], DT[, c("b", "a")])
+expected_sd = data.table(g=c(1,1,2,2), a=1:4, b=5:8)
+test(2339.4, DT[, .SD[, 1:2], by=g], expected_sd)
+expected_single_int = data.table(g=c(1,2), V1=c(1,1))
+test(2339.5, DT[, 1, by=g], expected_single_int)
\ No newline at end of file
diff --git a/man/data.table.Rd b/man/data.table.Rd
index 658c57234f..8f15f5ce79 100644
--- a/man/data.table.Rd
+++ b/man/data.table.Rd
@@ -97,8 +97,8 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
 
     See \href{../doc/datatable-intro.html}{\code{vignette("datatable-intro")}} and \code{example(data.table)}.}
 
-    \item{by}{ Column names are seen as if they are variables (as in \code{j} when \code{with=TRUE}). The \code{data.table} is then grouped by the \code{by} and \code{j} is evaluated within each group. The order of the rows within each group is preserved, as is the order of the groups. \code{by} accepts:
-
+    \item{by}{ Column names are seen as if they are variables (as in \code{j} when \code{with=TRUE}). \emph{Note that `by` and `keyby` are ignored when `j` is a character or numeric vector used for selecting columns (i.e., when the internal `with=FALSE` is triggered).} The \code{data.table} is then grouped by the \code{by} and \code{j} is evaluated within each group. The order of the rows within each group is preserved, as is the order of the groups. \code{by} accepts:
+    
     \itemize{
         \item A single unquoted column name: e.g., \code{DT[, .(sa=sum(a)), by=x]}
 

From ea5458b35fd1d523ede80c2b978d554841e4823d Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Sat, 16 Aug 2025 11:39:44 +0000
Subject: [PATCH 2/8] ..

---
 R/data.table.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/data.table.R b/R/data.table.R
index b7654b4204..1615519fa9 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -768,7 +768,7 @@ replace_dot_alias = function(e) {
         # else the NA in ansvals are for join inherited scope (test 1973), and NA could be in irows from join and data in i should be returned (test 1977)
         #   in both cases leave to the R-level subsetting of i and x together further below
       } else if (is.numeric(j)) {
-        if (!missingby) {
+        if (!missingby && (missing(with) || isTRUE(with))) {
             warning(
                 "`by` or `keyby` is ignored when `j` is a numeric vector used for column selection. ", "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]")
         }

From 87b375f327d0f290818b9af17a84a1c2f5ea9a1d Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Sat, 16 Aug 2025 17:32:42 +0000
Subject: [PATCH 3/8] corrected

---
 NEWS.md               |  2 +-
 R/data.table.R        | 18 ++++++++++--------
 inst/tests/tests.Rraw |  2 +-
 man/data.table.Rd     |  2 +-
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index b14c50454d..d3be30ec1b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -107,7 +107,7 @@
 
 18. `fwrite` now allows `dec` to be the same as `sep` for edge cases where only one will be written, e.g. 0-row or 1-column tables. [#7227](https://github.com/Rdatatable/data.table/issues/7227). Thanks @MichaelChirico for the report and @venom1204 for the fix.
 
-22. Using `by=` or `keyby=` with a simple numeric or character vector in `j` (e.g. `DT[, 1:2, by=grp]`) used to silently ignore the grouping argument. This now issues a warning to alert the user that grouping is not applied in this syntax and guides them to use the `.SD` idiom instead. [#5397](https://github.com/Rdatatable/data.table/issues/5397). Thanks to @mcol for the report and @venom1204 for the fix.
+19. Using `by=` or `keyby=` with a simple numeric or character vector in `j` (e.g. `DT[, 1:2, by=grp]`) used to silently ignore the grouping argument. This now issues a warning to alert the user that grouping is not applied in this syntax and guides them to use the `.SD` idiom instead. [#5397](https://github.com/Rdatatable/data.table/issues/5397). Thanks to @mcol for the report and @venom1204 for the fix.
 
     ```r
     DT = data.table(a=1:4, grp=c(1,1,2,2))
diff --git a/R/data.table.R b/R/data.table.R
index 1615519fa9..7f78ae6b8b 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -739,11 +739,11 @@ replace_dot_alias = function(e) {
       if (!length(j) && !notj) return( null.data.table() )
       if (is.factor(j)) j = as.character(j)  # fix for FR: #358
       if (is.character(j)) {
-        if (!missingby && (missing(with) || isTRUE(with))) {
-            warning(
-                "`by` or `keyby` is ignored when `j` is a character vector used for column selection. ",
-                "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]"
-            )
+        if (!missingby) {
+        warning(
+            "`by` or `keyby` is ignored when `j` is a character vector used for column selection. ",
+            "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]"
+        )
         }
         if (notj) {
           if (anyNA(idx <- chmatch(j, names_x)))
@@ -768,9 +768,11 @@ replace_dot_alias = function(e) {
         # else the NA in ansvals are for join inherited scope (test 1973), and NA could be in irows from join and data in i should be returned (test 1977)
         #   in both cases leave to the R-level subsetting of i and x together further below
       } else if (is.numeric(j)) {
-        if (!missingby && (missing(with) || isTRUE(with))) {
-            warning(
-                "`by` or `keyby` is ignored when `j` is a numeric vector used for column selection. ", "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]")
+        if (!missingby) {
+        warning(
+            "`by` or `keyby` is ignored when `j` is a numeric vector used for column selection. ",
+            "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]"
+        )
         }
         j = as.integer(j)
         if (any(w <- (j>ncol(x)))) stopf("Item %d of j is %d which is outside the column number range [1,ncol=%d]", idx <- which.first(w), j[idx], ncol(x))
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index ac365e07f8..f3a8f60095 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21624,7 +21624,7 @@ local({
 DT = data.table(a=1:4, b=5:8, g=c(1,1,2,2))
 test(2339.1, DT[, 1:2, by=g], DT[, 1:2], warning="`by` or `keyby` is ignored")
 test(2339.2, DT[, 2:1, keyby=g], DT[, 2:1], warning="`by` or `keyby` is ignored")
-test(2339.3, DT[, c("b", "a"), by=g, with=FALSE], DT[, c("b", "a")])
+test(2339.3, DT[, c("b", "a"), by=g, with=FALSE], DT[, c("b", "a")], warning="`by` or `keyby` is ignored")
 expected_sd = data.table(g=c(1,1,2,2), a=1:4, b=5:8)
 test(2339.4, DT[, .SD[, 1:2], by=g], expected_sd)
 expected_single_int = data.table(g=c(1,2), V1=c(1,1))
diff --git a/man/data.table.Rd b/man/data.table.Rd
index 8f15f5ce79..f583b5439e 100644
--- a/man/data.table.Rd
+++ b/man/data.table.Rd
@@ -97,7 +97,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
 
     See \href{../doc/datatable-intro.html}{\code{vignette("datatable-intro")}} and \code{example(data.table)}.}
 
-    \item{by}{ Column names are seen as if they are variables (as in \code{j} when \code{with=TRUE}). \emph{Note that `by` and `keyby` are ignored when `j` is a character or numeric vector used for selecting columns (i.e., when the internal `with=FALSE` is triggered).} The \code{data.table} is then grouped by the \code{by} and \code{j} is evaluated within each group. The order of the rows within each group is preserved, as is the order of the groups. \code{by} accepts:
+    \item{by}{ Column names are seen as if they are variables (as in \code{j} when \code{with=TRUE}). \emph{Note that \code{by} and \code{keyby} are ignored when \code{j} is a character or numeric vector used for selecting columns (i.e., when the internal \code{with=FALSE} is triggered).} The \code{data.table} is then grouped by the \code{by} and \code{j} is evaluated within each group. ...}    
     
     \itemize{
         \item A single unquoted column name: e.g., \code{DT[, .(sa=sum(a)), by=x]}

From 7b845330badde941a32038b151ec396afcc6c5cc Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Sat, 16 Aug 2025 18:23:46 +0000
Subject: [PATCH 4/8] lintr

---
 R/data.table.R    | 9 +++++----
 man/data.table.Rd | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 7f78ae6b8b..2f789b7adf 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -741,8 +741,9 @@ replace_dot_alias = function(e) {
       if (is.character(j)) {
         if (!missingby) {
         warning(
-            "`by` or `keyby` is ignored when `j` is a character vector used for column selection. ",
-            "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]"
+          "`by` or `keyby` is ignored when `j` is a character vector used for column selection. ",
+          "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]",
+          call. = FALSE
         )
         }
         if (notj) {
@@ -770,8 +771,8 @@ replace_dot_alias = function(e) {
       } else if (is.numeric(j)) {
         if (!missingby) {
         warning(
-            "`by` or `keyby` is ignored when `j` is a numeric vector used for column selection. ",
-            "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]"
+          "`by` or `keyby` is ignored when `j` is a numeric vector used for column selection. ", "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]",
+          call. = FALSE
         )
         }
         j = as.integer(j)
diff --git a/man/data.table.Rd b/man/data.table.Rd
index f583b5439e..d2dccead3e 100644
--- a/man/data.table.Rd
+++ b/man/data.table.Rd
@@ -97,8 +97,8 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
 
     See \href{../doc/datatable-intro.html}{\code{vignette("datatable-intro")}} and \code{example(data.table)}.}
 
-    \item{by}{ Column names are seen as if they are variables (as in \code{j} when \code{with=TRUE}). \emph{Note that \code{by} and \code{keyby} are ignored when \code{j} is a character or numeric vector used for selecting columns (i.e., when the internal \code{with=FALSE} is triggered).} The \code{data.table} is then grouped by the \code{by} and \code{j} is evaluated within each group. ...}    
-    
+    \item{by}{ Column names are seen as if they are variables (as in \code{j} when \code{with=TRUE}). \emph{Note that \code{by} and \code{keyby} are ignored when \code{j} is a character or numeric vector used for selecting columns (i.e., when the internal \code{with=FALSE} is triggered).} The \code{data.table} is then grouped by the \code{by} and \code{j} is evaluated within each group. The order of the rows within each group is preserved, as is the order of the groups. \code{by} accepts:    
+
     \itemize{
         \item A single unquoted column name: e.g., \code{DT[, .(sa=sum(a)), by=x]}
 

From 242847aa96971d23d9ed14de0aa49742d0a07536 Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Wed, 20 Aug 2025 11:06:07 +0000
Subject: [PATCH 5/8] restructure

---
 R/data.table.R | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 2f789b7adf..bbc4042ffe 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -735,17 +735,22 @@ replace_dot_alias = function(e) {
         names(..syms) = ..syms
         j = eval(jsub, lapply(substr(..syms, 3L, nchar(..syms)), get, pos=parent.frame()), parent.frame())
       }
-      if (is.logical(j)) j = which(j)
       if (!length(j) && !notj) return( null.data.table() )
       if (is.factor(j)) j = as.character(j)  # fix for FR: #358
+      if (is.character(j) || (is.numeric(j) && !is.logical(j))) {
+          if (!missingby) {
+              j_type = if (is.character(j)) "a character" else "a numeric"
+              warning(
+                  "`by` or `keyby` is ignored when `j` is ", j_type, " vector used for column selection. ",
+                  "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]",
+                  call. = FALSE
+              )
+          }
+      }
+
+      if (is.logical(j)) j = which(j)
+
       if (is.character(j)) {
-        if (!missingby) {
-        warning(
-          "`by` or `keyby` is ignored when `j` is a character vector used for column selection. ",
-          "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]",
-          call. = FALSE
-        )
-        }
         if (notj) {
           if (anyNA(idx <- chmatch(j, names_x)))
             warningf(ngettext(sum(is.na(idx)), "column not removed because not found: %s", "columns not removed because not found: %s"),
@@ -769,12 +774,6 @@ replace_dot_alias = function(e) {
         # else the NA in ansvals are for join inherited scope (test 1973), and NA could be in irows from join and data in i should be returned (test 1977)
         #   in both cases leave to the R-level subsetting of i and x together further below
       } else if (is.numeric(j)) {
-        if (!missingby) {
-        warning(
-          "`by` or `keyby` is ignored when `j` is a numeric vector used for column selection. ", "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]",
-          call. = FALSE
-        )
-        }
         j = as.integer(j)
         if (any(w <- (j>ncol(x)))) stopf("Item %d of j is %d which is outside the column number range [1,ncol=%d]", idx <- which.first(w), j[idx], ncol(x))
         j = j[j!=0L]

From 418a74e11a8011b5ad2b8a9ace249bf6f912c77e Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Wed, 20 Aug 2025 20:35:29 +0000
Subject: [PATCH 6/8] added backticks

---
 R/data.table.R | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index bbc4042ffe..9b1cbcf1bd 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -740,11 +740,8 @@ replace_dot_alias = function(e) {
       if (is.character(j) || (is.numeric(j) && !is.logical(j))) {
           if (!missingby) {
               j_type = if (is.character(j)) "a character" else "a numeric"
-              warning(
-                  "`by` or `keyby` is ignored when `j` is ", j_type, " vector used for column selection. ",
-                  "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]",
-                  call. = FALSE
-              )
+              fmt = "`by` or `keyby` is ignored when `j` is %s vector used for column selection. Perhaps you intended to use `.SD`? For example: DT[, .SD[, %s], by = ...]"
+              warning(sprintf(fmt, j_type, gsub("%", "%%", deparse(jsub))), call. = FALSE)
           }
       }
 

From c2d84edb3c3bb3c135fe75f0b9efb134d3a2ec3e Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Wed, 20 Aug 2025 20:36:45 +0000
Subject: [PATCH 7/8] ..

---
 R/data.table.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/data.table.R b/R/data.table.R
index 9b1cbcf1bd..0832f21a01 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -741,7 +741,7 @@ replace_dot_alias = function(e) {
           if (!missingby) {
               j_type = if (is.character(j)) "a character" else "a numeric"
               fmt = "`by` or `keyby` is ignored when `j` is %s vector used for column selection. Perhaps you intended to use `.SD`? For example: DT[, .SD[, %s], by = ...]"
-              warning(sprintf(fmt, j_type, gsub("%", "%%", deparse(jsub))), call. = FALSE)
+              warningf(sprintf(fmt, j_type, gsub("%", "%%", deparse(jsub))), call. = FALSE)
           }
       }
 

From b150b0a5ffc6865fd2f29a954ef8aec023d38808 Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Wed, 20 Aug 2025 20:45:32 +0000
Subject: [PATCH 8/8] warningf

---
 R/data.table.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 0832f21a01..f60ade1ca0 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -740,8 +740,10 @@ replace_dot_alias = function(e) {
       if (is.character(j) || (is.numeric(j) && !is.logical(j))) {
           if (!missingby) {
               j_type = if (is.character(j)) "a character" else "a numeric"
-              fmt = "`by` or `keyby` is ignored when `j` is %s vector used for column selection. Perhaps you intended to use `.SD`? For example: DT[, .SD[, %s], by = ...]"
-              warningf(sprintf(fmt, j_type, gsub("%", "%%", deparse(jsub))), call. = FALSE)
+              warningf(
+                  "`by` or `keyby` is ignored when `j` is %s vector used for column selection. Perhaps you intended to use `.SD`? For example: DT[, .SD[, %s], by = ...]",j_type,
+                  gsub("%", "%%", deparse(jsub))
+              )
           }
       }