Skip to content

Commit bf70018

Browse files
committed
Fix date operations for R 4.3; speed up summarization by ~60%
Extracting date() directly from datetimes vs. converting via as_date() saves a LOT of time. Model building actually seems quite fast compared to other summaries, so doesn't seem necessary to limit the time period just yet.
1 parent cd4ea6c commit bf70018

File tree

6 files changed

+40
-8
lines changed

6 files changed

+40
-8
lines changed

R/stocks.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ get_price_data <- function(data) {
105105
prices_5d <- last_n_days(data, 5)[, list(time, ticker, curr)]
106106
prices_1m <- last_n_days(data, 30)[, list(time, ticker, curr)]
107107
# Aggregate by day
108-
prices_all <- data[, list(curr = max(curr)), by = list(time = as_date(time), ticker)]
108+
prices_all <- data[, list(curr = max(curr)), by = list(time = date(time), ticker)]
109109

110110
peaks <- peaks_by_day(data)
111111

@@ -163,7 +163,7 @@ last_n_days <- function(data, n, full_days = FALSE) {
163163
time_day <- 3600*24
164164
start_time <- max(data$time) - n * time_day
165165
if (full_days) {
166-
start_time <- as_date(start_time)
166+
start_time <- floor_date(start_time)
167167
}
168168
data[time >= start_time]
169169
}

R/summary.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ summarize_all <- function(data) {
7575
# Get price highs by day
7676
high_by_day <- function(data, fill = TRUE) {
7777
highs <- data[data[, list(.I = .I[which.max(curr)]),
78-
by = list(ticker, date = as_date(time))]$.I]
78+
by = list(ticker, date = date(time))]$.I]
7979

8080
if (fill) {
8181
# Fill in missing days
82-
highs[, date := as_date(time)]
82+
highs[, date := date(time)]
8383
all_dates <- highs[, list(date = seq(min(date), max(date), by = "days")), by = ticker]
8484
highs <- highs[all_dates, on = list(date, ticker)]
8585
highs[is.na(curr), time := as_datetime(format(date))]
@@ -148,7 +148,7 @@ summarize_volume_price <- function(data) {
148148
# Summarize market volume (shares bought) by day
149149
summarize_volume_day <- function(data) {
150150
summ <- data[, list(volume = total_volume(volume)),
151-
by = list(date = as_date(time), ticker)]
151+
by = list(date = date(time), ticker)]
152152
summ <- summ[, list(volume = sum(volume)), by = date]
153153
summ
154154
}

R/utils.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,20 @@ as_datetime <- function(x, tz = TZ_NST) {
55
}
66

77
as_date <- function(x, tz = TZ_NST) {
8+
# Note: as.Date ignores tz for character strings
89
as.Date(x, tz = tz)
910
}
1011

12+
# Get the date of an existing date-time object. Use this over as_date() for
13+
# date-time objects as date() will be much faster.
14+
date <- function(x) {
15+
lubridate::date(x)
16+
}
17+
18+
floor_date <- function(x, unit = "day") {
19+
lubridate::floor_date(x, unit = unit)
20+
}
21+
1122
to_iso_string <- function(time, tz = "UTC") {
1223
strftime(time, tz = tz, format = "%Y-%m-%dT%H:%M:%SZ")
1324
}

tests/testthat/test-predict.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
test_that("predict_prices", {
22
data <- read_stock_data(stock_data_example())
3-
predicted <- predict_prices(data[time <= as_date("2018-07-15")])
3+
predicted <- predict_prices(data[time <= as_datetime("2018-07-15")])
44

55
p <- predicted[curr %in% c(6, 10, 15, 30, 60, 90), p]
66
expected_p <- c(8.03, 7.18, 4.46, 0.348, 0.176, 0.052)

tests/testthat/test-stocks.R

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ test_that("get_price_data", {
211211
expect_equal(prices[["1d"]], data[1:2, ])
212212
expect_equal(prices[["5d"]], data[1:3, ])
213213
expect_equal(prices[["1m"]], data[1:4, ])
214-
expect_equal(prices[["all"]], data[, list(time = as_date(time), ticker, curr)])
214+
expect_equal(prices[["all"]], data[, list(time = date(time), ticker, curr)])
215215
expect_equal(prices$peaks, peaks_by_day(data))
216216
})
217217

@@ -316,6 +316,14 @@ test_that("last_n_days", {
316316
))
317317
))
318318

319+
expect_equal(last_n_days(data, 3), data.table(
320+
time = as_datetime(c(
321+
"2018-03-06 8:24:58",
322+
"2018-03-07",
323+
"2018-03-07 08:24:59"
324+
))
325+
))
326+
319327
expect_equal(last_n_days(data, 3, TRUE), data.table(
320328
time = as_datetime(c(
321329
"2018-03-04",

tests/testthat/test-utils.R

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,23 @@ test_that("as_datetime converts time strings", {
66

77
test_that("as_date converts date strings", {
88
date <- as_date("2018-07-03")
9-
expected <- as.Date("2018-07-03", tz = TZ_NST)
9+
expected <- as.Date("2018-07-03")
1010
expect_equal(date, expected)
1111
})
1212

13+
test_that("date gets the date of date-times", {
14+
time <- as_datetime("2018-07-01T17:30:00Z")
15+
expect_equal(date(time), as.Date("2018-07-01"))
16+
date <- as_date("2018-07-03")
17+
expect_equal(date(date), as.Date("2018-07-03"))
18+
})
19+
20+
test_that("floor_date rounds dates down", {
21+
time <- floor_date(as_datetime("2018-07-01T17:30:00Z"))
22+
expected <- as_datetime("2018-07-01")
23+
expect_equal(time, expected)
24+
})
25+
1326
test_that("to_iso_string converts datetimes", {
1427
time <- as_datetime("2018-07-01T17:30:00Z")
1528
str <- to_iso_string(time)

0 commit comments

Comments
 (0)