Rdatatable · Mukulyadav2004 · Aug 4, 2025 · Aug 4, 2025 · Aug 4, 2025 · Aug 4, 2025
@@ -285,6 +285,43 @@ test.list <- atime::atime_test_list(
     expr = data.table::isoweek(x),
     Slow = "548410d23dd74b625e8ea9aeb1a5d2e9dddd2927",   # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/548410d23dd74b625e8ea9aeb1a5d2e9dddd2927)
     Fast = "c0b32a60466bed0e63420ec105bc75c34590865e"),  # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7144/commits) that uses a much faster implementation
+
+  # Test case adapted from https://github.com/Rdatatable/data.table/issues/4177 which is where the issue was reported.
+  # Fixed in https://github.com/Rdatatable/data.table/pull/7236
+  "fwrite(select) #4177 Nx5" = atime::atime_test(
+    setup = {
+      set.seed(1L)
+      DT = data.table(a=rnorm(N), b=rnorm(N), c=rnorm(N), d=rnorm(N), e=rnorm(N))
+      temp_file = tempfile()
+    },
+    expr = {
+      has_select = "select" %chin% names(formals(data.table::fwrite))
+      if (has_select) {
+        data.table::fwrite(DT, temp_file, select = c("a","b","c"))
+      } else {
+        data.table::fwrite(DT[, c("a","b","c"), with = FALSE], temp_file)
+      }
+    },
+    Slow = "66cb6d2393cef30083b444346a7600a079207806",   # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/66cb6d2393cef30083b444346a7600a079207806)
+    Fast = "1887699fe965b5aa1fb8cb16b5507b7a5cbf5c85"),   # Commit in the PR (https://github.com/Rdatatable/data.table/pull/4177/commits) that adds select parameter
+
+  # Wide case: 1 row, N columns; select a single column
+  "fwrite(select) #4177 1xN" = atime::atime_test(
+    setup = {
+      DT = data.table(t(1:N))
+      temp_file = tempfile()
+      select_idx = 1L
+    },
+    expr = {
+      has_select = "select" %chin% names(formals(data.table::fwrite))
+      if (has_select) {
+        data.table::fwrite(DT, temp_file, select = select_idx)
+      } else {
+        data.table::fwrite(data.table:::`[.data.table`(DT, , select_idx, with = FALSE), temp_file)
+      }
+    },
+    Slow = "66cb6d2393cef30083b444346a7600a079207806",     # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/66cb6d2393cef30083b444346a7600a079207806)
+    Fast = "1887699fe965b5aa1fb8cb16b5507b7a5cbf5c85"),     # Commit in the PR (https://github.com/Rdatatable/data.table/pull/4177/commits) that adds select parameter
 
     tests=extra.test.list)
 # nolint end: undesirable_operator_linter.
@@ -69,6 +69,8 @@
 
 15. New function `isoyear()` has been implemented as a complement to `isoweek()`, returning the ISO 8601 year corresponding to a given date, [#7154](https://github.com/Rdatatable/data.table/issues/7154). Thanks to @ben-schwen and @MichaelChirico for the suggestion and @venom1204 for the implementation.
 
+16. `fwrite()` gains `select` argument to write only specified columns, avoiding temporary object creation for memory efficiency, [#4177](https://github.com/Rdatatable/data.table/issues/4177). For `data.table` objects, this uses `.shallow()` to create shallow copies without data duplication. Thanks to @artidataio for feature request, @ColeMiller1 for suggesting implementation and @Mukulyadav2004 for the implementation.
+
 ### BUG FIXES
 
 1. `fread()` no longer warns on certain systems on R 4.5.0+ where the file owner can't be resolved, [#6918](https://github.com/Rdatatable/data.table/issues/6918). Thanks @ProfFancyPants for the report and PR.

@@ -14,7 +14,8 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
            bom = FALSE,
            verbose=getOption("datatable.verbose", FALSE),
            encoding = "",
-           forceDecimal = FALSE) {
+           forceDecimal = FALSE,
+           select = NULL) {
   na = as.character(na[1L]) # fix for #1725
   if (length(encoding) != 1L || !encoding %chin% c("", "UTF-8", "native")) {
     stopf("Argument 'encoding' must be '', 'UTF-8' or 'native'.")
@@ -27,6 +28,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
   buffMB = as.integer(buffMB)
   nThread = as.integer(nThread)
   compressLevel = as.integer(compressLevel)
+
   # write.csv default is 'double' so fwrite follows suit. write.table's default is 'escape'
   # validate arguments
   if (is.matrix(x)) { # coerce to data.table if input object is matrix
@@ -39,6 +41,22 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
       x = as.data.table(x)
     }
   }
+  # Handle select argument using .shallow()
+  if (!is.null(select)) {
+    cols = if (is.numeric(select)) {  # numeric/integer avoids O(#cols) name-match overhead
+        as.integer(select)
+    } else {
+        colnamesInt(x, select)
+    }
+    if (is.data.table(x)) {
+        if (length(cols) < NCOL(x) || !identical(cols, seq_len(NCOL(x)))) { # only build a shallow view when columns are reduced or reordered
+           x = .shallow(x, cols)
+        }
+    } else {
+      x = x[cols]
+    }
+  }
+
   stopifnot(
     is.list(x),
     identical(quote,"auto") || isTRUEorFALSE(quote),

@@ -21620,3 +21620,18 @@ local({
   test(2338.9, {fwrite(dd, f, forceDecimal=FALSE); fread(f)}, di)
 })
 
+# test for select parameter #4177
+DT = data.table(a=1:2, b=3:4)
+f = tempfile()
+fwrite(DT, f, select = "a") 
+test(2339.1, names(fread(f)), "a")
+df = as.data.frame(DT)
+fwrite(df, f, select = "a")
+test(2339.2, names(fread(f)), "a")
+l = as.list(DT)
+fwrite(l, f, select = "a")
+test(2339.3, names(fread(f)), "a")
+m = as.matrix(DT)
+fwrite(m, f, select = "a")
+test(2339.4, names(fread(f)), "a")
+unlink(f)
@@ -22,7 +22,8 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
   bom = FALSE,
   verbose = getOption("datatable.verbose", FALSE),
   encoding = "",
-  forceDecimal = FALSE)
+  forceDecimal = FALSE,
+  select = NULL)
 }
 \arguments{
   \item{x}{Any \code{list} of same length vectors; e.g. \code{data.frame} and \code{data.table}. If \code{matrix}, it gets internally coerced to \code{data.table} preserving col names but not row names}
@@ -64,6 +65,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
   \item{verbose}{Be chatty and report timings?}
   \item{encoding}{ The encoding of the strings written to the CSV file. Default is \code{""}, which means writing raw bytes without considering the encoding. Other possible options are \code{"UTF-8"} and \code{"native"}. }
   \item{forceDecimal}{ Should decimal points be forced for whole numbers in numeric columns? When \code{FALSE}, the default, whole numbers like \code{c(1.0, 2.0, 3.0)} will be written as \samp{1, 2, 3} i.e., dropping \code{dec}. }
+  \item{select}{Vector of column names or column numbers specifying which columns to include. When \code{NULL} (default), all columns are selected. This avoids creating temporary subsets for memory efficiency.}
 }
 \details{
 \code{fwrite} began as a community contribution with \href{https://github.com/Rdatatable/data.table/pull/1613}{pull request #1613} by Otto Seiskari. This gave Matt Dowle the impetus to specialize the numeric formatting and to parallelize: \url{https://h2o.ai/blog/2016/fast-csv-writing-for-r/}. Final items were tracked in \href{https://github.com/Rdatatable/data.table/issues/1664}{issue #1664} such as automatic quoting, \code{bit64::integer64} support, decimal/scientific formatting exactly matching \code{write.csv} between 2.225074e-308 and 1.797693e+308 to 15 significant figures, \code{row.names}, dates (between 0000-03-01 and 9999-12-31), times and \code{sep2} for \code{list} columns where each cell can itself be a vector.

@@ -270,14 +270,23 @@ if (requireNamespace("bit64", quietly = TRUE)) {
 
 ### 2.4 Column Order and Subset Control
 
-To control the order and subset of columns written to file, subset the data.table before calling `fwrite()`. The `col.names` argument in `fwrite()` is a logical (TRUE/FALSE) that controls whether the header row is written, not which columns are written.
+To control the order and subset of columns written to file, you can use `[.data.table` to make a new table before calling `fwrite()`, but it is more efficient to use the `select` argument, which avoids making a copy.
 
 ```{r}
 dt = data.table(A = 1:3, B = 4:6, C = 7:9)
-
 # Write only columns C and A, in that order
 fwrite(dt[, .(C, A)], "out.csv")
 cat(readLines("out.csv"), sep = "\n")
+fwrite(dt, "out.csv", select=c("C","A"))
+cat(readLines("out.csv"), sep = "\n")
+file.remove("out.csv")
+```
+
+The `col.names` argument in `fwrite()` is a logical (TRUE/FALSE) that controls whether the header row is written, not which columns are written.
+
+```{r}
+fwrite(dt, "out.csv", col.names=FALSE)
+cat(readLines("out.csv"), sep = "\n")
 file.remove("out.csv")
 ```
 
@@ -292,4 +301,4 @@ For users interested in detailed, up-to-date performance comparisons, we recomme
 
 These benchmarks consistently show that `fread` and `fwrite` are highly competitive and often state-of-the-art for performance in the R ecosystem.
 
-***
+***