Benchmarking
Benchmarking.Rmd
Timings hydrorecipes vs recipes
Timings for the hydrorecipes package are prefaced with an “h”. The first few comparisons include the R6 interface in hydrorecipes to check if there is a loss of speed compared to the standard API. Most users are likely to use the standard API so the remaining benchmarks only present that. Typical speed improvements are between 2-10x and memory consumption is typically half of the recipes package.
creating a recipe
relative <- FALSE
n <- c(1e2, 1e4, 5e6)
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat),
hrec2 = recipe(formula = formula, data = dat),
rec = recipes::recipe(formula = formula, data = dat),
check = FALSE
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 643.63µs 665.39µs 1477. 1.87MB 6.19
#> 2 hrec2 100 642.95µs 664.15µs 1488. 3.66KB 6.95
#> 3 rec 100 2.91ms 2.99ms 327. 84.14MB 4.09
#> 4 hrec1 10000 640.57µs 661.19µs 1497. 544B 4.07
#> 5 hrec2 10000 643.59µs 661.88µs 1499. 544B 6.16
#> 6 rec 10000 2.91ms 2.97ms 336. 42.63KB 6.25
#> 7 hrec1 5000000 640.35µs 662.02µs 1497. 544B 6.48
#> 8 hrec2 5000000 640.65µs 657.96µs 1505. 544B 4.07
#> 9 rec 5000000 2.91ms 2.96ms 336. 19.08MB 6.23
add a step
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
add_step(hydrorecipes:::StepCenter$new(x)),
hrec2 = recipe(formula = formula, data = dat) |>
step_center(x),
rec = {recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x)},
check = FALSE,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 868.55µs 892.43µs 1104. 531.23KB 6.18
#> 2 hrec2 100 891.83µs 925.42µs 1069. 6.57KB 6.30
#> 3 rec 100 3.03ms 3.12ms 318. 102.48KB 4.10
#> 4 hrec1 10000 868.75µs 901.76µs 1103. 1.06KB 6.18
#> 5 hrec2 10000 888.89µs 919.56µs 1082. 1.06KB 6.21
#> 6 rec 10000 3.03ms 3.12ms 320. 43.16KB 4.10
#> 7 hrec1 5000000 870.86µs 891.51µs 1114. 1.06KB 4.08
#> 8 hrec2 5000000 886.83µs 912.45µs 1089. 1.06KB 6.19
#> 9 rec 5000000 3.04ms 3.09ms 322. 19.08MB 6.35
step_center prep
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
add_step(hydrorecipes:::StepCenter$new(x))
hrec2 = recipe(formula = formula, data = dat) |>
step_center(x)
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x)
bench::mark(
hrec1$prep(),
hrec2 |> prep(),
rec |> recipes::prep(),
check = FALSE,
min_iterations = 10,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1$prep() 100 181.91µs 188.72µs 5238. 80.73KB 6.18
#> 2 prep(hrec2) 100 182.44µs 189.4µs 5173. 2.69KB 6.17
#> 3 recipes::prep(rec) 100 13.72ms 13.91ms 71.6 4.06MB 6.71
#> 4 hrec1$prep() 10000 190.77µs 198.63µs 4932. 0B 6.17
#> 5 prep(hrec2) 10000 193.3µs 200.07µs 4943. 0B 6.18
#> 6 recipes::prep(rec) 10000 13.82ms 14.04ms 70.7 224.78KB 6.63
#> 7 hrec1$prep() 5000000 4.99ms 5.08ms 196. 0B 0
#> 8 prep(hrec2) 5000000 5.05ms 5.08ms 197. 0B 0
#> 9 recipes::prep(rec) 5000000 101.8ms 106.78ms 6.47 95.4MB 10.4
step_center prep and bake
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
add_step(hydrorecipes:::StepCenter$new(x))
hrec2 = recipe(formula = formula, data = dat) |>
step_center(x)
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x)
bench::mark(
hrec1$prep()$bake(),
hrec2 |> prep() |> bake(),
rec |> recipes::prep() |> recipes::bake(new_data = NULL),
check = FALSE,
min_iterations = 10,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 hrec1$prep()$bake() 1e2 260.9µs 270.3µs 3605. 382.14KB 6.17
#> 2 bake(prep(hrec2)) 1e2 260.7µs 271.7µs 3640. 3.56KB 6.17
#> 3 recipes::bake(recipes::pre… 1e2 14.4ms 14.7ms 67.6 161.56KB 4.22
#> 4 hrec1$prep()$bake() 1e4 280.3µs 291.8µs 3389. 78.17KB 4.13
#> 5 bake(prep(hrec2)) 1e4 280.6µs 291.4µs 3353. 78.17KB 6.18
#> 6 recipes::bake(recipes::pre… 1e4 14.6ms 14.8ms 67.0 229.11KB 6.70
#> 7 hrec1$prep()$bake() 5e6 12.4ms 12.4ms 80.2 38.15MB 80.2
#> 8 bake(prep(hrec2)) 5e6 12.2ms 12.4ms 78.4 38.15MB 39.2
#> 9 recipes::bake(recipes::pre… 5e6 100.7ms 101.7ms 9.84 95.4MB 44.3
step_center
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_center(x) |>
plate())[["x"]],
rec = (recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x) |>
recipes::prep() |>
recipes::bake(new_data = NULL))[["x"]],
check = TRUE,
min_iterations = 10,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 1.26ms 1.31ms 759. 97.5KB 6.26
#> 2 rec 100 17.77ms 18.08ms 55.0 40.2KB 4.23
#> 3 hrec 10000 1.28ms 1.32ms 751. 80.9KB 6.22
#> 4 rec 10000 18ms 18.23ms 54.5 272.3KB 4.36
#> 5 hrec 5000000 10.08ms 13.68ms 74.4 38.1MB 47.8
#> 6 rec 5000000 110.8ms 110.8ms 9.03 114.5MB 81.2
step_scale
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_scale(x, fun = fsd, n_sd = 2L) |>
plate("tbl"))[["x"]],
rec = (recipes::recipe(formula = formula, data = dat) |>
recipes::step_scale(x, factor = 2L) |>
recipes::prep() |>
recipes::bake(new_data = NULL))[["x"]],
check = TRUE,
relative = relative,
min_iterations = 5
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 1.27ms 1.31ms 756. 594.8KB 6.22
#> 2 rec 100 17.65ms 17.99ms 54.2 133.2KB 4.34
#> 3 hrec 10000 1.31ms 1.35ms 728. 79.2KB 6.20
#> 4 rec 10000 17.83ms 18.08ms 54.8 233.2KB 4.39
#> 5 hrec 5000000 20.82ms 24.55ms 41.6 38.1MB 17.8
#> 6 rec 5000000 131.12ms 131.27ms 7.62 95.4MB 11.4
step_intercept
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_intercept() |>
plate("tbl"))[["intercept"]],
rec = (recipes::recipe(formula = formula, data = dat) |>
recipes::step_intercept() |>
recipes::prep() |>
recipes::bake(new_data = NULL))[["intercept"]],
check = TRUE,
relative = relative,
min_iterations = 2
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 1.24ms 1.28ms 774. 527.7KB 6.23
#> 2 rec 100 16.64ms 16.9ms 58.9 116.3KB 4.36
#> 3 hrec 10000 1.24ms 1.29ms 764. 79.2KB 6.21
#> 4 rec 10000 16.79ms 17.04ms 58.3 112.1KB 6.73
#> 5 hrec 5000000 10.65ms 10.93ms 91.5 38.1MB 25.0
#> 6 rec 5000000 48.75ms 50.15ms 20.0 38.2MB 8.55
step_normalize
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows),
z = rnorm(rows))
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_normalize(c(x, y, z)) |>
plate("tbl"))[, c("x", "z", "y")],
hrec2 = (recipe(formula = formula, data = dat) |>
step_center(c(x, y, z)) |>
step_scale(c(x, y, z)) |>
plate("tbl"))[, c("x", "z", "y")],
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_normalize(x, y, z) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
relative = relative,
min_iterations = 2,
check = TRUE
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.34ms 1.38ms 719. 541.98KB 6.24
#> 2 hrec2 100 1.7ms 1.75ms 568. 6.56KB 6.26
#> 3 rec 100 18.23ms 18.61ms 52.9 88.22KB 4.24
#> 4 hrec1 10000 1.51ms 1.55ms 642. 235.58KB 6.23
#> 5 hrec2 10000 1.88ms 1.94ms 514. 470.62KB 4.11
#> 6 rec 10000 18.69ms 19ms 52.3 663.09KB 6.82
#> 7 hrec1 5000000 83.16ms 90.09ms 11.2 114.44MB 5.59
#> 8 hrec2 5000000 99.27ms 101.64ms 9.86 228.88MB 9.86
#> 9 rec 5000000 273.57ms 273.77ms 3.65 305.21MB 3.65
step_lag
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_lead_lag(x, lag = 1:30) |>
plate("tbl")),
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_lag(x, lag = 1:30) |>
recipes::prep() |>
recipes::bake(new_data = NULL)),
check = TRUE,
relative = relative,
min_iterations = 10
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 2.83ms 2.9ms 340. 799.77KB 6.29
#> 2 rec 100 22.44ms 22.88ms 43.7 281.48KB 4.37
#> 3 hrec1 10000 2.98ms 3.06ms 317. 2.3MB 4.12
#> 4 rec 10000 23.64ms 23.93ms 41.6 5.9MB 6.93
#> 5 hrec1 5000000 179.61ms 196.59ms 3.95 1.12GB 3.95
#> 6 rec 5000000 660.25ms 722.94ms 1.29 2.83GB 2.19
step_distributed_lag
formula <- as.formula(y~x)
results <- bench::press(
rows = c(5e5, 5e6, 1e7),
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_distributed_lag(x, knots = log_lags_arma(5, 86401)) |>
plate(),
check = FALSE,
relative = relative,
min_iterations = 2
)
}
)
#> Running with:
#> rows
#> 1 500000
#> 2 5000000
#> 3 10000000
results
#> # A tibble: 3 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 500000 65.8ms 69.7ms 14.4 19.1MB 0
#> 2 hrec 5000000 450.2ms 450.2ms 2.22 155.9MB 2.22
#> 3 hrec 10000000 904.9ms 904.9ms 1.11 308.5MB 1.11
step_harmonic
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_value = 0.0) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_val = 0.0,
keep_original_cols = TRUE) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
# sin and cos terms order is different
check = FALSE,
relative = relative,
min_iterations = 10
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 1.41ms 1.46ms 680. 565.88KB 6.22
#> 2 rec 100 18.57ms 18.87ms 52.7 176.72KB 4.39
#> 3 hrec 10000 2.67ms 2.7ms 369. 548.27KB 4.10
#> 4 rec 10000 20.28ms 20.5ms 48.4 1.91MB 4.40
#> 5 hrec 5000000 596.49ms 618.41ms 1.63 267.03MB 0.181
#> 6 rec 5000000 775.4ms 782.13ms 1.25 934.64MB 0.535
step_pca
set.seed(1)
formula <- as.formula(x~a + b + c + d + e + f + g + h + i + j + k + l)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
a = rnorm(rows),
b = rnorm(rows),
c = rnorm(rows),
d = rnorm(rows),
e = rnorm(rows),
f = rnorm(rows),
g = rnorm(rows),
h = rnorm(rows),
i = rnorm(rows),
j = rnorm(rows),
k = rnorm(rows),
l = rnorm(rows)
)
bench::mark(
hrec1 = recipe(formula = formula, data = dat)|>
step_pca(all_predictor(), n_comp = 10L) |>
plate(),
hrec2 = recipe(formula = formula, data = dat)|>
step_pca(all_predictor(), n_comp = 5L) |>
plate(),
hrec3 = recipe(formula = formula, data = dat)|>
step_pca(all_predictor(),
n_comp = 10L,
center = FALSE,
scale = FALSE) |>
plate(),
hrec4 = recipe(formula = formula, data = dat)|>
step_pca(all_predictor(),
n_comp = 5L,
center = FALSE,
scale = FALSE) |>
plate(),
rec1 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 10L,
options = list(center = TRUE, scale. = TRUE))|>
recipes::prep() |>
recipes::bake(new_data = NULL),
rec2 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 5L,
options = list(center = TRUE, scale. = TRUE)) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
rec3 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 10L) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
rec4 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 5L) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = FALSE,
relative = relative,
min_iterations = 2
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
print(results, n = 100)
#> # A tibble: 24 × 14
#> expression rows min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
#> 1 hrec1 100 1.63ms 1.68ms 594. 706.11KB 6.25 285 3
#> 2 hrec2 100 1.63ms 1.68ms 594. 34.89KB 4.11 289 2
#> 3 hrec3 100 1.56ms 1.61ms 621. 34.02KB 6.24 299 3
#> 4 hrec4 100 1.56ms 1.6ms 621. 24.95KB 6.23 299 3
#> 5 rec1 100 21.69ms 22.21ms 44.7 661.98KB 4.47 20 2
#> 6 rec2 100 21.14ms 21.49ms 46.5 277.84KB 6.97 20 3
#> 7 rec3 100 21.28ms 21.59ms 46.2 154.4KB 6.93 20 3
#> 8 rec4 100 20.77ms 21.08ms 47.3 136.89KB 4.30 22 2
#> 9 hrec1 10000 4.24ms 4.28ms 224. 3.37MB 2.03 110 1
#> 10 hrec2 10000 4.15ms 4.2ms 238. 2.6MB 4.17 114 2
#> 11 hrec3 10000 3.17ms 3.22ms 299. 2.45MB 2.03 147 1
#> 12 hrec4 10000 3.1ms 3.15ms 317. 1.69MB 2.03 156 1
#> 13 rec1 10000 41.54ms 41.82ms 23.8 21.17MB 4.76 10 2
#> 14 rec2 10000 40.52ms 41.01ms 24.3 20.02MB 2.03 12 1
#> 15 rec3 10000 34.57ms 34.99ms 28.4 7.89MB 4.73 12 2
#> 16 rec4 10000 33.78ms 34.03ms 29.3 6.74MB 4.51 13 2
#> 17 hrec1 5000000 782.28ms 807.83ms 1.24 1.64GB 0.619 2 1
#> 18 hrec2 5000000 730.48ms 767.09ms 1.30 1.27GB 0.652 2 1
#> 19 hrec3 5000000 391.58ms 409.78ms 2.44 1.19GB 2.44 2 2
#> 20 hrec4 5000000 318.48ms 361.1ms 2.77 839.24MB 0 2 0
#> 21 rec1 5000000 4.14s 4.2s 0.238 10.3GB 1.43 2 12
#> 22 rec2 5000000 4.01s 4.33s 0.231 9.74GB 2.08 2 18
#> 23 rec3 5000000 2.01s 2.07s 0.483 3.82GB 1.21 2 5
#> 24 rec4 5000000 1.69s 1.73s 0.577 3.26GB 1.44 2 5
#> # ℹ 5 more variables: total_time <bch:tm>, result <list>, memory <list>,
#> # time <list>, gc <list>
step_dummy
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = qF(sample(1:10, rows, replace = TRUE)),
z = rnorm(rows))
bench::mark(
hrec = unname(recipe(formula = formula, data = dat) |>
step_dummy(y) |>
plate("tbl")),
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_dummy(y, keep_original_cols = TRUE) |>
recipes::prep() |>
recipes::bake(new_data = NULL)),
check = TRUE,
relative = FALSE,
min_iterations = 1
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 2.8ms 2.88ms 346. 559.23KB 6.36
#> 2 rec 100 21.2ms 21.6ms 46.3 487.89KB 4.41
#> 3 hrec 10000 2.9ms 2.97ms 333. 353.05KB 4.11
#> 4 rec 10000 38ms 38.45ms 25.9 6.09MB 4.70
#> 5 hrec 5000000 45.8ms 48.57ms 19.2 235.66MB 3.83
#> 6 rec 5000000 11.5s 11.48s 0.0871 2.98GB 0.261
step_find_interval
- no direct comparison so compare to step_cut
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_find_interval(x, vec = c(-0.1, 0, 0.1)) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_cut(x, breaks = c(-0.1, 0, 0.1)) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = FALSE,
relative = relative,
min_iterations = 1
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 1.39ms 1.43ms 692. 562.7KB 6.99
#> 2 rec 100 18.38ms 18.77ms 52.5 174.2KB 0
#> 3 hrec 10000 1.53ms 1.58ms 632. 157.5KB 0
#> 4 rec 10000 18.68ms 19.36ms 51.1 585.1KB 2.13
#> 5 hrec 5000000 71.02ms 71.37ms 13.9 76.3MB 0
#> 6 rec 5000000 377.75ms 383.97ms 2.60 267.1MB 0
step_varying
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rep(1, rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_varying(c(x, y, z)) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_zv(x, y, z) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = TRUE,
relative = relative,
min_iterations = 1
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 1.26ms 1.29ms 768. 583.33KB 2.08
#> 2 rec 100 17.88ms 18.4ms 53.9 93.13KB 2.07
#> 3 hrec 10000 1.25ms 1.29ms 760. 40.17KB 2.06
#> 4 rec 10000 18.51ms 18.61ms 53.4 1.33MB 0
#> 5 hrec 5000000 5.42ms 5.5ms 179. 19.07MB 0
#> 6 rec 5000000 628.34ms 628.34ms 1.59 649.8MB 0
step_kernel_filter
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(2e4, 2e5),
{
dat <- data.frame(x = rep(1, rows),
y = 1:rows,
z = cumsum(rnorm(rows)))
bench::mark(
hrec = unname((recipe(formula = formula, data = dat) |>
step_kernel_filter(z, kernel = list(rep(1, 5001)/5001), align = "center") |>
plate("tbl"))[10000, "kernel_filter_z"]),
{rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_window(z, size = 5001, statistic = "mean") |>
recipes::prep() |>
recipes::bake(new_data = NULL)
unname(rec[10000, "z"])},
min_iterations = 1,
check = TRUE
)
}
)
#> Running with:
#> rows
#> 1 20000
#> 2 200000
results
#> # A tibble: 4 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 2e4 3.59ms 3.65ms 273. 750KB 2.07
#> 2 { rec = recipes::bake(re… 2e4 300.12ms 300.44ms 3.33 508.16KB 0
#> 3 hrec 2e5 5.82ms 5.92ms 167. 1.56MB 0
#> 4 { rec = recipes::bake(re… 2e5 3.64s 3.64s 0.275 3.11MB 0
multiple steps
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = 1:rows)
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_lead_lag(x, lag = 1:10) |>
step_harmonic(x,
frequency = c(1, 2, 3, 4, 5),
cycle_size = 0.1,
starting_value = 0) |>
step_center(x) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_lag(x, lag = 1:10, keep_original_cols = TRUE) |>
recipes::step_harmonic(x,
frequency = c(1, 2, 3, 4, 5),
cycle_size = 0.1,
starting_val = 0,
keep_original_cols = TRUE) |>
recipes::step_center(x) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = FALSE,
relative = relative,
min_iterations = 1
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 2.24ms 2.29ms 436. 26.89KB 2.08
#> 2 rec 100 33.93ms 34.48ms 28.8 191.67KB 2.21
#> 3 hrec 10000 3.33ms 3.39ms 295. 1.76MB 2.05
#> 4 rec 10000 36.27ms 36.51ms 27.2 5.13MB 0
#> 5 hrec 5000000 2.03s 2.03s 0.493 877.39MB 0
#> 6 rec 5000000 2.78s 2.78s 0.360 2.44GB 1.08
step_spline_b
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec = unname(recipe(formula = formula, data = dat) |>
step_spline_b(x, df = 13) |>
plate("tbl")),
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_spline_b(x, deg_free = 13, keep_original_cols = TRUE)|>
recipes::prep() |>
recipes::bake(new_data = NULL)),
check = TRUE,
relative = relative,
min_iterations = 10
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 2.95ms 3.02ms 330. 583.65KB 0
#> 2 rec 100 21.59ms 21.91ms 45.4 365.84KB 2.16
#> 3 hrec 10000 3.81ms 3.9ms 255. 1.07MB 2.08
#> 4 rec 10000 23.9ms 24.26ms 40.6 4.55MB 2.14
#> 5 hrec 5000000 530.38ms 590.65ms 1.72 534.06MB 0.687
#> 6 rec 5000000 1.62s 1.66s 0.575 2.2GB 1.21
step_aquifer_grf & step_aquifer_theis
The Theis solution is a subset of the grf solution.
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_aquifer_grf(time = x, flow_rate = y) |>
plate("dt"),
hrec2 = recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x, flow_rate = y) |>
plate("dt"),
check = TRUE,
relative = relative)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.42ms 1.5ms 667. 555.2KB 0
#> 2 hrec2 100 1.5ms 1.54ms 628. 530.4KB 3.57
#> 3 hrec1 10000 2.34ms 2.38ms 419. 160.2KB 0
#> 4 hrec2 10000 2.43ms 2.48ms 402. 83.1KB 2.07
#> 5 hrec1 5000000 604.03ms 604.03ms 1.66 76.3MB 0
#> 6 hrec2 5000000 596.65ms 596.65ms 1.68 38.2MB 0
step_aquifer_leaky
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_leaky(time = x,
flow_rate = y,
leakage = 100000000) |>
plate("dt")),
hrec2 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x,
flow_rate = y) |>
plate("dt")),
check = TRUE,
relative = relative)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.52ms 1.56ms 634. 615.1KB 2.07
#> 2 hrec2 100 1.6ms 1.64ms 606. 7.45KB 2.07
#> 3 hrec1 10000 3.27ms 3.3ms 298. 396.41KB 0
#> 4 hrec2 10000 2.54ms 2.58ms 387. 84.8KB 0
#> 5 hrec1 5000000 1.32s 1.32s 0.758 190.74MB 0
#> 6 hrec2 5000000 615.82ms 615.82ms 1.62 38.15MB 0
step_aquifer_patch
formula <- as.formula(y~x)
results <- bench::press(
rows = c(1e5),
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_aquifer_grf(time = x, flow_rate = y) |>
plate("dt")),
hrec3 = (recipe(formula = formula, data = dat) |>
step_aquifer_patch(time = x,
flow_rate = 0.01,
thickness = 1.0,
radius = 100.0,
radius_patch = 200.0,
specific_storage_inner = 1e-6,
specific_storage_outer = 1e-6,
hydraulic_conductivity_inner = 1e-4,
hydraulic_conductivity_outer = 1e-4,
n_stehfest = 8L
) |>
plate("dt")),
check = FALSE,
relative = relative)
}
)
#> Running with:
#> rows
#> 1 100000
results
#> # A tibble: 2 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100000 9.88ms 10.1ms 98.4 1.53MB 0
#> 2 hrec3 100000 953.04ms 953ms 1.05 1.31MB 0
step_vadose_weeks
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = as.numeric(1:rows))
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_vadose_weeks(time = x,
air_diffusivity = 0.8,
thickness = 5,
precision = 1e-12) |>
plate("dt")),
check = FALSE,
min_iterations = 10
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 3 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.27ms 1.31ms 760. 545.5KB 2.07
#> 2 hrec1 10000 1.46ms 1.49ms 669. 160.1KB 0
#> 3 hrec1 5000000 171.34ms 171.85ms 5.80 76.3MB 0
step_transport_ogata_banks
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(expand.grid(as.numeric(1:rows), as.numeric(1:10)))
names(dat) <- c('x', 'y')
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_transport_ogata_banks(time = x,
distance = y) |>
plate("dt")),
check = FALSE,
min_iterations = 10
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 3 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.35ms 1.39ms 718. 559KB 2.07
#> 2 hrec1 10000 4.65ms 4.72ms 209. 785KB 2.09
#> 3 hrec1 5000000 1.47s 1.47s 0.680 381MB 1.02
step_transport_fractures_solute
formula <- as.formula(~time+z+x)
dat <- setDT(expand.grid(10^(3:8),
seq(0.0, 10, 1),
c(0.0)))
names(dat) <- c("time", "z", "x")
results <-
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_transport_fractures_solute(time = time,
distance_fracture = z,
distance_matrix = x) |>
plate("dt"),
check = FALSE,
min_iterations = 10
)
results
#> # A tibble: 1 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 1.78ms 1.84ms 538. 580KB 0
step_transport_fractures_heat
formula <- as.formula(~time+z+x)
dat <- setDT(expand.grid(10^(3:8),
seq(0.0, 100, 1),
c(0.0, 0.05)))
names(dat) <- c("time", "z", "x")
results <-
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_transport_fractures_heat(time = time,
distance_fracture = z,
distance_matrix = x) |>
plate("dt"),
check = FALSE,
min_iterations = 10
)
results
#> # A tibble: 1 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 10.3ms 10.3ms 96.5 587KB 0
step_pgram and step_welch
formula <- as.formula(y~x + z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows), y = rnorm(rows), z = rnorm(rows),
q = rnorm(rows), r = rnorm(rows), s = rnorm(rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_fft_pgram(c(x, y),
3,
TRUE,
TRUE,
FALSE,
0.1) |>
plate(),
hrec2 = recipe(formula = formula, data = dat) |>
step_fft_pgram(c(x, y),
3,
TRUE,
TRUE,
TRUE,
0.1) |>
plate(),
hrec3 = recipe(formula = formula, data = dat) |>
step_fft_welch(c(x, y),
length_subset = nrow(dat) / 10,
overlap = 0.60,
window = window_nuttall(nrow(dat) / 10)) |>
plate(),
check = FALSE,
min_iterations = 3
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.65ms 1.7ms 586. 566.41KB 2.76
#> 2 hrec2 100 1.68ms 1.72ms 574. 18.21KB 0
#> 3 hrec3 100 1.48ms 1.54ms 647. 546.3KB 2.56
#> 4 hrec1 10000 2.39ms 2.43ms 410. 1.38MB 0
#> 5 hrec2 10000 2.25ms 2.38ms 421. 1.07MB 0
#> 6 hrec3 10000 2.71ms 2.8ms 356. 260.91KB 2.58
#> 7 hrec1 5000000 471.55ms 487.89ms 2.05 686.65MB 1.02
#> 8 hrec2 5000000 414.88ms 414.88ms 2.41 534.06MB 4.82
#> 9 hrec3 5000000 443.85ms 454.35ms 2.22 125.89MB 0
step_transfer_welch and step_transfer_pgram
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows), y = rnorm(rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_fft_transfer_pgram(c(x, y),
3,
TRUE,
TRUE,
0.1) |>
plate("dt"),
hrec2 = recipe(formula = formula, data = dat) |>
step_fft_transfer_welch(c(x, y),
length_subset = nrow(dat) / 10,
overlap = 0.60,
window = window_nuttall(nrow(dat) / 10)) |>
plate("dt"),
check = FALSE,
min_iterations = 2
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.65ms 1.72ms 572. 548.3KB 0
#> 2 hrec2 100 1.54ms 1.58ms 631. 542.9KB 0
#> 3 hrec1 10000 3.22ms 3.35ms 291. 471.7KB 0
#> 4 hrec2 10000 2.88ms 2.94ms 340. 198.3KB 0
#> 5 hrec1 5000000 949.08ms 949.08ms 1.05 229.1MB 1.05
#> 6 hrec2 5000000 489.63ms 494.47ms 2.02 95.4MB 0
step_ols
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows),
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_center(x) |>
step_scale(z) |>
step_ols(formula = formula) |>
plate(),
check = FALSE,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 3 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 100 2.68ms 2.77ms 356. 812.21KB 0
#> 2 hrec 10000 4.63ms 4.66ms 214. 1.34MB 0
#> 3 hrec 5000000 208.04ms 212.5ms 4.71 667.58MB 2.35
step_ols_gap_fill
set.seed(123)
n <- 100000
frm <- formula(x ~ y + z)
x <- cumsum(rnorm(n))
dat <- data.table(x = x, y = x, z = as.numeric(1:n))
dat[, x := x + c(rep(20, n/2), rep(0, n/2))]
dat[, x := x + 3.0 * sin(z * 1/n)]
tmp <- copy(dat$x)
# Set value to NA. These values will be estimated.
dat[60000:70000, x := NA_real_]
dat <- unclass(dat)
bench::mark(
{h = recipe(formula = frm, data = dat) |>
step_find_interval(z, vec = c(0, n/2, n)) |>
step_intercept() |>
step_spline_b(z, df = 4) |>
step_drop_columns(z)
hrec = recipe(formula = frm, data = dat) |>
step_ols_gap_fill(c(x, y, z), recipe = h) |>
plate()},
)
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : NULL
#> ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> # A tibble: 1 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch> <bch:> <dbl> <bch:byt> <dbl>
#> 1 { h = step_drop_columns(step_spline… 26ms 26.1ms 38.3 38.2MB 2.39
check
step_check_spacing
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
dat[9, "x"] <- NA
dat[9, "y"] <- NA
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_check_spacing(y) |>
step_check_na(y) |>
prep() |>
bake(),
hrec2 =recipe(formula = formula, data = dat) |>
step_check_spacing(x) |>
step_check_na(x) |>
prep() |>
bake(),
check = FALSE,
relative = relative,
min_iterations = 10
)
}
)
#> Running with:
#> rows
#> 1 100000
results
#> # A tibble: 2 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100000 1.89ms 1.93ms 510. 2.59MB 0
#> 2 hrec2 100000 2.32ms 2.36ms 422. 782.89KB 0