Skip to content

Commit 9bf1d2e

Browse files
committed
error for formula interface with sparse data
1 parent b7a5994 commit 9bf1d2e

File tree

4 files changed

+73
-9
lines changed

4 files changed

+73
-9
lines changed

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
* `fit_xy()` can now take dgCMatrix input for `x` argument (#1121).
44

5-
* `fit()` and `fit_xy()` can now take sparse tibbles as data values (#1165).
5+
* `fit_xy()` can now take sparse tibbles as data values (#1165).
66

77
* `predict()` can now take dgCMatrix and sparse tibble input for `new_data` argument, and error informatively when model doesn't support it (#1167).
88

R/convert_data.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@
4848
)
4949
}
5050

51+
if (is_sparse_tibble(data)) {
52+
cli::cli_abort(
53+
"Sparse data cannot be used with formula interface. Please use
54+
{.fn fit_xy} instead."
55+
)
56+
}
57+
5158
if (remove_intercept) {
5259
data <- data[, colnames(data) != "(Intercept)", drop = FALSE]
5360
}

tests/testthat/_snaps/sparsevctrs.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# sparse tibble can be passed to `fit()
22

3+
Code
4+
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
5+
Condition
6+
Error in `.convert_form_to_xy_fit()`:
7+
! Sparse data cannot be used with formula interface. Please use `fit_xy()` instead.
8+
9+
---
10+
311
Code
412
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data[1:100, ])
513
Condition
@@ -8,6 +16,14 @@
816

917
# sparse matrix can be passed to `fit()
1018

19+
Code
20+
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
21+
Condition
22+
Error in `.convert_form_to_xy_fit()`:
23+
! Sparse data cannot be used with formula interface. Please use `fit_xy()` instead.
24+
25+
---
26+
1127
Code
1228
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data[1:100, ])
1329
Condition
@@ -46,6 +62,14 @@
4662
Error in `predict()`:
4763
! `x` is a sparse matrix, but `linear_reg()` with engine "lm" doesn't accept that.
4864

65+
# sparse data work with xgboost engine
66+
67+
Code
68+
tree_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
69+
Condition
70+
Error in `.convert_form_to_xy_fit()`:
71+
! Sparse data cannot be used with formula interface. Please use `fit_xy()` instead.
72+
4973
# to_sparse_data_frame() is used correctly
5074

5175
Code

tests/testthat/test-sparsevctrs.R

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,47 @@
11
test_that("sparse tibble can be passed to `fit()", {
22
skip_if_not_installed("xgboost")
3+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
34

45
hotel_data <- sparse_hotel_rates()
56
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
67

78
spec <- boost_tree() %>%
89
set_mode("regression") %>%
910
set_engine("xgboost")
10-
11-
expect_no_error(
11+
12+
expect_snapshot(
13+
error = TRUE,
1214
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
1315
)
1416

1517
spec <- linear_reg() %>%
1618
set_mode("regression") %>%
1719
set_engine("lm")
1820

21+
withr::local_options("sparsevctrs.verbose_materialize" = NULL)
22+
1923
expect_snapshot(
2024
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data[1:100, ])
2125
)
2226
})
2327

2428
test_that("sparse matrix can be passed to `fit()", {
2529
skip_if_not_installed("xgboost")
26-
30+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
31+
2732
hotel_data <- sparse_hotel_rates()
28-
33+
2934
spec <- boost_tree() %>%
3035
set_mode("regression") %>%
3136
set_engine("xgboost")
3237

33-
expect_no_error(
38+
expect_snapshot(
39+
error = TRUE,
3440
lm_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
3541
)
3642

43+
withr::local_options("sparsevctrs.verbose_materialize" = NULL)
44+
3745
spec <- linear_reg() %>%
3846
set_mode("regression") %>%
3947
set_engine("lm")
@@ -45,9 +53,14 @@ test_that("sparse matrix can be passed to `fit()", {
4553

4654
test_that("sparse tibble can be passed to `fit_xy()", {
4755
skip_if_not_installed("xgboost")
48-
56+
4957
hotel_data <- sparse_hotel_rates()
5058
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
59+
60+
# materialize outcome
61+
hotel_data$avg_price_per_room <- hotel_data$avg_price_per_room[]
62+
63+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
5164

5265
spec <- boost_tree() %>%
5366
set_mode("regression") %>%
@@ -57,6 +70,8 @@ test_that("sparse tibble can be passed to `fit_xy()", {
5770
lm_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
5871
)
5972

73+
withr::local_options("sparsevctrs.verbose_materialize" = NULL)
74+
6075
spec <- linear_reg() %>%
6176
set_mode("regression") %>%
6277
set_engine("lm")
@@ -68,6 +83,7 @@ test_that("sparse tibble can be passed to `fit_xy()", {
6883

6984
test_that("sparse matrices can be passed to `fit_xy()", {
7085
skip_if_not_installed("xgboost")
86+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
7187

7288
hotel_data <- sparse_hotel_rates()
7389

@@ -94,6 +110,11 @@ test_that("sparse tibble can be passed to `predict()", {
94110

95111
hotel_data <- sparse_hotel_rates()
96112
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
113+
114+
# materialize outcome
115+
hotel_data$avg_price_per_room <- hotel_data$avg_price_per_room[]
116+
117+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
97118

98119
spec <- rand_forest(trees = 10) %>%
99120
set_mode("regression") %>%
@@ -105,6 +126,8 @@ test_that("sparse tibble can be passed to `predict()", {
105126
predict(tree_fit, hotel_data)
106127
)
107128

129+
withr::local_options("sparsevctrs.verbose_materialize" = NULL)
130+
108131
spec <- linear_reg() %>%
109132
set_mode("regression") %>%
110133
set_engine("lm")
@@ -122,6 +145,7 @@ test_that("sparse tibble can be passed to `predict()", {
122145

123146
test_that("sparse matrices can be passed to `predict()", {
124147
skip_if_not_installed("ranger")
148+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
125149

126150
hotel_data <- sparse_hotel_rates()
127151

@@ -151,6 +175,7 @@ test_that("sparse matrices can be passed to `predict()", {
151175

152176
test_that("sparse data work with xgboost engine", {
153177
skip_if_not_installed("xgboost")
178+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
154179

155180
spec <- boost_tree() %>%
156181
set_mode("regression") %>%
@@ -161,22 +186,28 @@ test_that("sparse data work with xgboost engine", {
161186
expect_no_error(
162187
tree_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
163188
)
164-
189+
165190
expect_no_error(
166191
predict(tree_fit, hotel_data)
167192
)
168193

169194
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)
170195

171196

172-
expect_no_error(
197+
expect_snapshot(
198+
error = TRUE,
173199
tree_fit <- fit(spec, avg_price_per_room ~ ., data = hotel_data)
174200
)
175201

176202
expect_no_error(
177203
predict(tree_fit, hotel_data)
178204
)
179205

206+
# materialize outcome
207+
withr::local_options("sparsevctrs.verbose_materialize" = NULL)
208+
hotel_data$avg_price_per_room <- hotel_data$avg_price_per_room[]
209+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
210+
180211
expect_no_error(
181212
tree_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1])
182213
)
@@ -188,6 +219,7 @@ test_that("sparse data work with xgboost engine", {
188219

189220
test_that("to_sparse_data_frame() is used correctly", {
190221
skip_if_not_installed("xgboost")
222+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
191223

192224
local_mocked_bindings(
193225
to_sparse_data_frame = function(x, object) {
@@ -228,6 +260,7 @@ test_that("to_sparse_data_frame() is used correctly", {
228260

229261
test_that("maybe_sparse_matrix() is used correctly", {
230262
skip_if_not_installed("xgboost")
263+
withr::local_options("sparsevctrs.verbose_materialize" = 3)
231264

232265
local_mocked_bindings(
233266
maybe_sparse_matrix = function(x) {

0 commit comments

Comments
 (0)