Skip to contents

Timings hydrorecipes vs recipes

Timings for the hydrorecipes package are prefaced with an “h”. The first few comparisons include the R6 interface in hydrorecipes to check if there is a loss of speed compared to the standard API. Most users are likely to use the standard API so the remaining benchmarks only present that. Typical speed improvements are between 2-10x and memory consumption is typically half of the recipes package.

creating a recipe

relative <- FALSE
n <- c(1e2, 1e4, 5e6)
formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    bench::mark(
      hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat),
      hrec2 = recipe(formula = formula, data = dat),
      rec   = recipes::recipe(formula = formula, data = dat),
      check = FALSE
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 9 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100 643.63µs 665.39µs     1477.    1.87MB     6.19
#> 2 hrec2          100 642.95µs 664.15µs     1488.    3.66KB     6.95
#> 3 rec            100   2.91ms   2.99ms      327.   84.14MB     4.09
#> 4 hrec1        10000 640.57µs 661.19µs     1497.      544B     4.07
#> 5 hrec2        10000 643.59µs 661.88µs     1499.      544B     6.16
#> 6 rec          10000   2.91ms   2.97ms      336.   42.63KB     6.25
#> 7 hrec1      5000000 640.35µs 662.02µs     1497.      544B     6.48
#> 8 hrec2      5000000 640.65µs 657.96µs     1505.      544B     4.07
#> 9 rec        5000000   2.91ms   2.96ms      336.   19.08MB     6.23

add a step

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    bench::mark(
      hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
        add_step(hydrorecipes:::StepCenter$new(x)),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_center(x),
      rec  = {recipes::recipe(formula = formula, data = dat) |>
          recipes::step_center(x)},
      check = FALSE,
      relative = relative
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 9 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100 868.55µs 892.43µs     1104.  531.23KB     6.18
#> 2 hrec2          100 891.83µs 925.42µs     1069.    6.57KB     6.30
#> 3 rec            100   3.03ms   3.12ms      318.  102.48KB     4.10
#> 4 hrec1        10000 868.75µs 901.76µs     1103.    1.06KB     6.18
#> 5 hrec2        10000 888.89µs 919.56µs     1082.    1.06KB     6.21
#> 6 rec          10000   3.03ms   3.12ms      320.   43.16KB     4.10
#> 7 hrec1      5000000 870.86µs 891.51µs     1114.    1.06KB     4.08
#> 8 hrec2      5000000 886.83µs 912.45µs     1089.    1.06KB     6.19
#> 9 rec        5000000   3.04ms   3.09ms      322.   19.08MB     6.35

step_center prep

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
      add_step(hydrorecipes:::StepCenter$new(x))
    hrec2 = recipe(formula = formula, data = dat) |>
      step_center(x)      
    rec   = recipes::recipe(formula = formula, data = dat) |>
      recipes::step_center(x)
    bench::mark(
      hrec1$prep(),
      hrec2 |> prep(),
      rec |> recipes::prep(),
      check = FALSE,
      min_iterations = 10,
      relative = relative
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 9 × 7
#>   expression            rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>           <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1$prep()           100 181.91µs 188.72µs   5238.     80.73KB     6.18
#> 2 prep(hrec2)            100 182.44µs  189.4µs   5173.      2.69KB     6.17
#> 3 recipes::prep(rec)     100  13.72ms  13.91ms     71.6     4.06MB     6.71
#> 4 hrec1$prep()         10000 190.77µs 198.63µs   4932.          0B     6.17
#> 5 prep(hrec2)          10000  193.3µs 200.07µs   4943.          0B     6.18
#> 6 recipes::prep(rec)   10000  13.82ms  14.04ms     70.7   224.78KB     6.63
#> 7 hrec1$prep()       5000000   4.99ms   5.08ms    196.          0B     0   
#> 8 prep(hrec2)        5000000   5.05ms   5.08ms    197.          0B     0   
#> 9 recipes::prep(rec) 5000000  101.8ms 106.78ms      6.47    95.4MB    10.4

step_center prep and bake

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows)
    hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
      add_step(hydrorecipes:::StepCenter$new(x))
    hrec2 = recipe(formula = formula, data = dat) |>
      step_center(x)
    rec   = recipes::recipe(formula = formula, data = dat) |>
      recipes::step_center(x)
    bench::mark(
      hrec1$prep()$bake(),
      hrec2 |> prep() |> bake(),
      rec |> recipes::prep() |> recipes::bake(new_data = NULL),
      check = FALSE,
      min_iterations = 10,
      relative = relative
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 9 × 7
#>   expression                   rows     min  median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                  <dbl> <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1$prep()$bake()           1e2 260.9µs 270.3µs   3605.    382.14KB     6.17
#> 2 bake(prep(hrec2))             1e2 260.7µs 271.7µs   3640.      3.56KB     6.17
#> 3 recipes::bake(recipes::pre…   1e2  14.4ms  14.7ms     67.6   161.56KB     4.22
#> 4 hrec1$prep()$bake()           1e4 280.3µs 291.8µs   3389.     78.17KB     4.13
#> 5 bake(prep(hrec2))             1e4 280.6µs 291.4µs   3353.     78.17KB     6.18
#> 6 recipes::bake(recipes::pre…   1e4  14.6ms  14.8ms     67.0   229.11KB     6.70
#> 7 hrec1$prep()$bake()           5e6  12.4ms  12.4ms     80.2    38.15MB    80.2 
#> 8 bake(prep(hrec2))             5e6  12.2ms  12.4ms     78.4    38.15MB    39.2 
#> 9 recipes::bake(recipes::pre…   5e6 100.7ms 101.7ms      9.84    95.4MB    44.3

step_center

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    
    bench::mark(
      hrec = (recipe(formula = formula, data = dat) |>
                step_center(x) |>
                plate())[["x"]],
      rec  = (recipes::recipe(formula = formula, data = dat) |>
                recipes::step_center(x) |> 
                recipes::prep() |> 
                recipes::bake(new_data = NULL))[["x"]],
      check = TRUE,
      min_iterations = 10,
      relative = relative
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100   1.26ms   1.31ms    759.      97.5KB     6.26
#> 2 rec            100  17.77ms  18.08ms     55.0     40.2KB     4.23
#> 3 hrec         10000   1.28ms   1.32ms    751.      80.9KB     6.22
#> 4 rec          10000     18ms  18.23ms     54.5    272.3KB     4.36
#> 5 hrec       5000000  10.08ms  13.68ms     74.4     38.1MB    47.8 
#> 6 rec        5000000  110.8ms  110.8ms      9.03   114.5MB    81.2

step_scale

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = (recipe(formula = formula, data = dat) |>
        step_scale(x, fun = fsd, n_sd = 2L) |>
        plate("tbl"))[["x"]],
      rec  = (recipes::recipe(formula = formula, data = dat) |>
        recipes::step_scale(x, factor = 2L) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL))[["x"]],
      check = TRUE,
      relative = relative,
      min_iterations = 5
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100   1.27ms   1.31ms    756.     594.8KB     6.22
#> 2 rec            100  17.65ms  17.99ms     54.2    133.2KB     4.34
#> 3 hrec         10000   1.31ms   1.35ms    728.      79.2KB     6.20
#> 4 rec          10000  17.83ms  18.08ms     54.8    233.2KB     4.39
#> 5 hrec       5000000  20.82ms  24.55ms     41.6     38.1MB    17.8 
#> 6 rec        5000000 131.12ms 131.27ms      7.62    95.4MB    11.4

step_intercept

formula <- as.formula(y~x)
results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows))
    bench::mark(
    hrec = (recipe(formula = formula, data = dat) |>
      step_intercept() |>
      plate("tbl"))[["intercept"]],
    rec = (recipes::recipe(formula = formula, data = dat) |>
      recipes::step_intercept() |> 
      recipes::prep() |> 
      recipes::bake(new_data = NULL))[["intercept"]],
      check = TRUE,
      relative = relative,
      min_iterations = 2
    )
    
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100   1.24ms   1.28ms     774.    527.7KB     6.23
#> 2 rec            100  16.64ms   16.9ms      58.9   116.3KB     4.36
#> 3 hrec         10000   1.24ms   1.29ms     764.     79.2KB     6.21
#> 4 rec          10000  16.79ms  17.04ms      58.3   112.1KB     6.73
#> 5 hrec       5000000  10.65ms  10.93ms      91.5    38.1MB    25.0 
#> 6 rec        5000000  48.75ms  50.15ms      20.0    38.2MB     8.55

step_normalize

formula <- as.formula(y~x+z)
results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows),
                      z = rnorm(rows))
    
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
        step_normalize(c(x, y, z)) |>
        plate("tbl"))[, c("x", "z", "y")],
      
      hrec2 = (recipe(formula = formula, data = dat) |>
        step_center(c(x, y, z)) |>
        step_scale(c(x, y, z)) |>
        plate("tbl"))[, c("x", "z", "y")],
      
      rec = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_normalize(x, y, z) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      
      relative = relative,
      min_iterations = 2,
      check = TRUE
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 9 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100   1.34ms   1.38ms    719.    541.98KB     6.24
#> 2 hrec2          100    1.7ms   1.75ms    568.      6.56KB     6.26
#> 3 rec            100  18.23ms  18.61ms     52.9    88.22KB     4.24
#> 4 hrec1        10000   1.51ms   1.55ms    642.    235.58KB     6.23
#> 5 hrec2        10000   1.88ms   1.94ms    514.    470.62KB     4.11
#> 6 rec          10000  18.69ms     19ms     52.3   663.09KB     6.82
#> 7 hrec1      5000000  83.16ms  90.09ms     11.2   114.44MB     5.59
#> 8 hrec2      5000000  99.27ms 101.64ms      9.86  228.88MB     9.86
#> 9 rec        5000000 273.57ms 273.77ms      3.65  305.21MB     3.65

step_lag

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec1 = unname(recipe(formula = formula, data = dat) |>
                       step_lead_lag(x, lag = 1:30) |>
                       plate("tbl")),
      rec   = unname(recipes::recipe(formula = formula, data = dat) |>
                       recipes::step_lag(x, lag = 1:30) |> 
                       recipes::prep() |> 
                       recipes::bake(new_data = NULL)),
      check = TRUE,
      relative = relative,
      min_iterations = 10
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100   2.83ms    2.9ms    340.    799.77KB     6.29
#> 2 rec            100  22.44ms  22.88ms     43.7   281.48KB     4.37
#> 3 hrec1        10000   2.98ms   3.06ms    317.       2.3MB     4.12
#> 4 rec          10000  23.64ms  23.93ms     41.6      5.9MB     6.93
#> 5 hrec1      5000000 179.61ms 196.59ms      3.95    1.12GB     3.95
#> 6 rec        5000000 660.25ms 722.94ms      1.29    2.83GB     2.19

step_distributed_lag

formula <- as.formula(y~x)

results <- bench::press(
  rows = c(5e5, 5e6, 1e7),
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_distributed_lag(x, knots = log_lags_arma(5, 86401)) |>
        plate(),
      check = FALSE,
      relative = relative,
      min_iterations = 2
    )
  }
)
#> Running with:
#>       rows
#> 1   500000
#> 2  5000000
#> 3 10000000

results
#> # A tibble: 3 × 7
#>   expression     rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>    <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec         500000   65.8ms   69.7ms     14.4     19.1MB     0   
#> 2 hrec        5000000  450.2ms  450.2ms      2.22   155.9MB     2.22
#> 3 hrec       10000000  904.9ms  904.9ms      1.11   308.5MB     1.11

step_harmonic

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_harmonic(x, 
                      frequency = c(1.0, 2.0, 3.0), 
                      cycle_size = 0.1, 
                      starting_value = 0.0) |>
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_harmonic(x, 
                               frequency = c(1.0, 2.0, 3.0), 
                               cycle_size = 0.1, 
                               starting_val = 0.0,
                               keep_original_cols = TRUE) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      
      # sin and cos terms order is different
      check = FALSE,
      relative = relative,
      min_iterations = 10
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100   1.41ms   1.46ms    680.    565.88KB    6.22 
#> 2 rec            100  18.57ms  18.87ms     52.7   176.72KB    4.39 
#> 3 hrec         10000   2.67ms    2.7ms    369.    548.27KB    4.10 
#> 4 rec          10000  20.28ms   20.5ms     48.4     1.91MB    4.40 
#> 5 hrec       5000000 596.49ms 618.41ms      1.63  267.03MB    0.181
#> 6 rec        5000000  775.4ms 782.13ms      1.25  934.64MB    0.535

step_pca

set.seed(1)
formula <- as.formula(x~a + b + c + d + e + f + g + h + i + j + k + l)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      a = rnorm(rows),
                      b = rnorm(rows),
                      c = rnorm(rows),
                      d = rnorm(rows),
                      e = rnorm(rows),
                      f = rnorm(rows),
                      g = rnorm(rows),
                      h = rnorm(rows),
                      i = rnorm(rows),
                      j = rnorm(rows),
                      k = rnorm(rows),
                      l = rnorm(rows)
    )
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat)|>
        step_pca(all_predictor(), n_comp = 10L) |>
        plate(),
      hrec2 = recipe(formula = formula, data = dat)|>
        step_pca(all_predictor(), n_comp = 5L) |>
        plate(),
      hrec3 = recipe(formula = formula, data = dat)|>
        step_pca(all_predictor(),
                 n_comp = 10L,
                 center = FALSE,
                 scale = FALSE) |>
        plate(),
      hrec4 = recipe(formula = formula, data = dat)|>
        step_pca(all_predictor(),
                 n_comp = 5L,
                 center = FALSE,
                 scale = FALSE) |>
        plate(),
      
      rec1  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 10L,
                          options = list(center = TRUE, scale. = TRUE))|> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      rec2  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 5L,
                          options = list(center = TRUE, scale. = TRUE)) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      rec3  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 10L) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      rec4  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_pca(recipes::all_predictors(),
                          num_comp = 5L) |> 
        recipes::prep() |> 
        recipes::bake(new_data = NULL),
      check = FALSE,
      relative = relative,
      min_iterations = 2
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000


print(results, n = 100)
#> # A tibble: 24 × 14
#>    expression    rows      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc
#>    <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int> <dbl>
#>  1 hrec1          100   1.63ms   1.68ms   594.     706.11KB    6.25    285     3
#>  2 hrec2          100   1.63ms   1.68ms   594.      34.89KB    4.11    289     2
#>  3 hrec3          100   1.56ms   1.61ms   621.      34.02KB    6.24    299     3
#>  4 hrec4          100   1.56ms    1.6ms   621.      24.95KB    6.23    299     3
#>  5 rec1           100  21.69ms  22.21ms    44.7    661.98KB    4.47     20     2
#>  6 rec2           100  21.14ms  21.49ms    46.5    277.84KB    6.97     20     3
#>  7 rec3           100  21.28ms  21.59ms    46.2     154.4KB    6.93     20     3
#>  8 rec4           100  20.77ms  21.08ms    47.3    136.89KB    4.30     22     2
#>  9 hrec1        10000   4.24ms   4.28ms   224.       3.37MB    2.03    110     1
#> 10 hrec2        10000   4.15ms    4.2ms   238.        2.6MB    4.17    114     2
#> 11 hrec3        10000   3.17ms   3.22ms   299.       2.45MB    2.03    147     1
#> 12 hrec4        10000    3.1ms   3.15ms   317.       1.69MB    2.03    156     1
#> 13 rec1         10000  41.54ms  41.82ms    23.8     21.17MB    4.76     10     2
#> 14 rec2         10000  40.52ms  41.01ms    24.3     20.02MB    2.03     12     1
#> 15 rec3         10000  34.57ms  34.99ms    28.4      7.89MB    4.73     12     2
#> 16 rec4         10000  33.78ms  34.03ms    29.3      6.74MB    4.51     13     2
#> 17 hrec1      5000000 782.28ms 807.83ms     1.24     1.64GB    0.619     2     1
#> 18 hrec2      5000000 730.48ms 767.09ms     1.30     1.27GB    0.652     2     1
#> 19 hrec3      5000000 391.58ms 409.78ms     2.44     1.19GB    2.44      2     2
#> 20 hrec4      5000000 318.48ms  361.1ms     2.77   839.24MB    0         2     0
#> 21 rec1       5000000    4.14s     4.2s     0.238    10.3GB    1.43      2    12
#> 22 rec2       5000000    4.01s    4.33s     0.231    9.74GB    2.08      2    18
#> 23 rec3       5000000    2.01s    2.07s     0.483    3.82GB    1.21      2     5
#> 24 rec4       5000000    1.69s    1.73s     0.577    3.26GB    1.44      2     5
#> # ℹ 5 more variables: total_time <bch:tm>, result <list>, memory <list>,
#> #   time <list>, gc <list>

step_dummy

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = qF(sample(1:10, rows, replace = TRUE)),
                      z = rnorm(rows))
    bench::mark(
      hrec = unname(recipe(formula = formula, data = dat) |>
                      step_dummy(y) |>
                      plate("tbl")),
      rec  = unname(recipes::recipe(formula = formula, data = dat) |>
                      recipes::step_dummy(y, keep_original_cols = TRUE) |>
                      recipes::prep() |>
                      recipes::bake(new_data = NULL)),
      check = TRUE,
      relative = FALSE,
      min_iterations = 1
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100    2.8ms   2.88ms  346.      559.23KB    6.36 
#> 2 rec            100   21.2ms   21.6ms   46.3     487.89KB    4.41 
#> 3 hrec         10000    2.9ms   2.97ms  333.      353.05KB    4.11 
#> 4 rec          10000     38ms  38.45ms   25.9       6.09MB    4.70 
#> 5 hrec       5000000   45.8ms  48.57ms   19.2     235.66MB    3.83 
#> 6 rec        5000000    11.5s   11.48s    0.0871    2.98GB    0.261

step_find_interval

  • no direct comparison so compare to step_cut
formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_find_interval(x, vec = c(-0.1, 0, 0.1)) |>
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_cut(x, breaks = c(-0.1, 0, 0.1)) |>
        recipes::prep() |>
        recipes::bake(new_data = NULL),
      check = FALSE,
      relative = relative,
      min_iterations = 1
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100   1.39ms   1.43ms    692.     562.7KB     6.99
#> 2 rec            100  18.38ms  18.77ms     52.5    174.2KB     0   
#> 3 hrec         10000   1.53ms   1.58ms    632.     157.5KB     0   
#> 4 rec          10000  18.68ms  19.36ms     51.1    585.1KB     2.13
#> 5 hrec       5000000  71.02ms  71.37ms     13.9     76.3MB     0   
#> 6 rec        5000000 377.75ms 383.97ms      2.60   267.1MB     0

step_varying

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rep(1, rows), 
                      y = 1:rows,
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_varying(c(x, y, z)) |>
        plate("tbl"),
      rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_zv(x, y, z) |>
        recipes::prep() |>
        recipes::bake(new_data = NULL),
      check = TRUE,
      relative = relative,
      min_iterations = 1
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100   1.26ms   1.29ms    768.    583.33KB     2.08
#> 2 rec            100  17.88ms   18.4ms     53.9    93.13KB     2.07
#> 3 hrec         10000   1.25ms   1.29ms    760.     40.17KB     2.06
#> 4 rec          10000  18.51ms  18.61ms     53.4     1.33MB     0   
#> 5 hrec       5000000   5.42ms    5.5ms    179.     19.07MB     0   
#> 6 rec        5000000 628.34ms 628.34ms      1.59   649.8MB     0

step_kernel_filter

formula <- as.formula(y~x+z)

results <- bench::press(
  rows = c(2e4, 2e5),
  {
    dat <- data.frame(x = rep(1, rows), 
                      y = 1:rows,
                      z = cumsum(rnorm(rows)))
    bench::mark(
      hrec = unname((recipe(formula = formula, data = dat) |>
                       step_kernel_filter(z, kernel = list(rep(1, 5001)/5001), align = "center") |>
                       plate("tbl"))[10000, "kernel_filter_z"]),
      {rec  = recipes::recipe(formula = formula, data = dat) |>
        recipes::step_window(z, size = 5001, statistic = "mean") |>
        recipes::prep() |>
        recipes::bake(new_data = NULL)
      unname(rec[10000, "z"])},
      
      min_iterations = 1,
      check = TRUE
    )
  }
)
#> Running with:
#>     rows
#> 1  20000
#> 2 200000

results
#> # A tibble: 4 × 7
#>   expression                 rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec                        2e4   3.59ms   3.65ms   273.        750KB     2.07
#> 2 { rec = recipes::bake(re…   2e4 300.12ms 300.44ms     3.33   508.16KB     0   
#> 3 hrec                        2e5   5.82ms   5.92ms   167.       1.56MB     0   
#> 4 { rec = recipes::bake(re…   2e5    3.64s    3.64s     0.275    3.11MB     0

multiple steps

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows), 
                      y = 1:rows)
    bench::mark(
    hrec = recipe(formula = formula, data = dat) |>
      step_lead_lag(x, lag = 1:10) |>
      step_harmonic(x, 
                    frequency = c(1, 2, 3, 4, 5), 
                    cycle_size = 0.1, 
                    starting_value = 0) |>
      step_center(x) |> 
      plate("tbl"),
    rec  = recipes::recipe(formula = formula, data = dat) |>
      recipes::step_lag(x, lag = 1:10, keep_original_cols = TRUE) |>
      recipes::step_harmonic(x, 
                             frequency = c(1, 2, 3, 4, 5), 
                             cycle_size = 0.1, 
                             starting_val = 0,
                             keep_original_cols = TRUE) |>
      recipes::step_center(x) |> 
      recipes::prep() |> 
      recipes::bake(new_data = NULL),
      check = FALSE,
      relative = relative,
      min_iterations = 1
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100   2.24ms   2.29ms   436.      26.89KB     2.08
#> 2 rec            100  33.93ms  34.48ms    28.8    191.67KB     2.21
#> 3 hrec         10000   3.33ms   3.39ms   295.       1.76MB     2.05
#> 4 rec          10000  36.27ms  36.51ms    27.2      5.13MB     0   
#> 5 hrec       5000000    2.03s    2.03s     0.493  877.39MB     0   
#> 6 rec        5000000    2.78s    2.78s     0.360    2.44GB     1.08

step_spline_b

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows),
                      y = 1:rows)
    bench::mark(
      hrec = unname(recipe(formula = formula, data = dat) |>
                      step_spline_b(x, df = 13) |>
                      plate("tbl")),
      rec  = unname(recipes::recipe(formula = formula, data = dat) |>
                      recipes::step_spline_b(x, deg_free = 13, keep_original_cols = TRUE)|> 
                      recipes::prep() |> 
                      recipes::bake(new_data = NULL)),
      check = TRUE,
      relative = relative,
      min_iterations = 10
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100   2.95ms   3.02ms   330.     583.65KB    0    
#> 2 rec            100  21.59ms  21.91ms    45.4    365.84KB    2.16 
#> 3 hrec         10000   3.81ms    3.9ms   255.       1.07MB    2.08 
#> 4 rec          10000   23.9ms  24.26ms    40.6      4.55MB    2.14 
#> 5 hrec       5000000 530.38ms 590.65ms     1.72   534.06MB    0.687
#> 6 rec        5000000    1.62s    1.66s     0.575     2.2GB    1.21

step_aquifer_grf & step_aquifer_theis

The Theis solution is a subset of the grf solution.

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_aquifer_grf(time = x, flow_rate = y) |>
        plate("dt"),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_aquifer_theis(time = x, flow_rate = y) |>
        plate("dt"),
      check = TRUE,
      relative = relative)
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100   1.42ms    1.5ms    667.     555.2KB     0   
#> 2 hrec2          100    1.5ms   1.54ms    628.     530.4KB     3.57
#> 3 hrec1        10000   2.34ms   2.38ms    419.     160.2KB     0   
#> 4 hrec2        10000   2.43ms   2.48ms    402.      83.1KB     2.07
#> 5 hrec1      5000000 604.03ms 604.03ms      1.66    76.3MB     0   
#> 6 hrec2      5000000 596.65ms 596.65ms      1.68    38.2MB     0

step_aquifer_leaky

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_leaky(time = x,
                                          flow_rate = y,
                                          leakage = 100000000) |>
                       plate("dt")),
      hrec2 = unname(recipe(formula = formula, data = dat) |>
                       step_aquifer_theis(time = x,
                                          flow_rate = y) |>
                       plate("dt")),
      check = TRUE,
      relative = relative)
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100   1.52ms   1.56ms   634.      615.1KB     2.07
#> 2 hrec2          100    1.6ms   1.64ms   606.       7.45KB     2.07
#> 3 hrec1        10000   3.27ms    3.3ms   298.     396.41KB     0   
#> 4 hrec2        10000   2.54ms   2.58ms   387.       84.8KB     0   
#> 5 hrec1      5000000    1.32s    1.32s     0.758  190.74MB     0   
#> 6 hrec2      5000000 615.82ms 615.82ms     1.62    38.15MB     0

step_aquifer_patch

formula <- as.formula(y~x)

results <- bench::press(
  rows = c(1e5),
  {
    dat <- data.frame(x = as.numeric(1:rows),
                      y = rep(0.01, rows))
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_aquifer_grf(time = x, flow_rate = y) |>
                 plate("dt")),
      hrec3 = (recipe(formula = formula, data = dat) |>
                 step_aquifer_patch(time = x,
                                    flow_rate = 0.01,
                                    thickness = 1.0,
                                    radius = 100.0,
                                    radius_patch = 200.0,
                                    specific_storage_inner = 1e-6,
                                    specific_storage_outer = 1e-6,
                                    hydraulic_conductivity_inner = 1e-4,
                                    hydraulic_conductivity_outer = 1e-4,
                                    n_stehfest = 8L
                 ) |>
                 plate("dt")),
      check = FALSE,
      relative = relative)
  }
)
#> Running with:
#>     rows
#> 1 100000


results
#> # A tibble: 2 × 7
#>   expression   rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>  <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1      100000   9.88ms   10.1ms     98.4     1.53MB        0
#> 2 hrec3      100000 953.04ms    953ms      1.05    1.31MB        0

step_vadose_weeks

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = as.numeric(1:rows), 
                      y = as.numeric(1:rows))
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_vadose_weeks(time = x, 
                                   air_diffusivity = 0.8, 
                                   thickness = 5, 
                                   precision = 1e-12) |>
                 plate("dt")),
      check = FALSE,
      min_iterations = 10
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 3 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100   1.27ms   1.31ms    760.     545.5KB     2.07
#> 2 hrec1        10000   1.46ms   1.49ms    669.     160.1KB     0   
#> 3 hrec1      5000000 171.34ms 171.85ms      5.80    76.3MB     0

step_transport_ogata_banks

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(expand.grid(as.numeric(1:rows), as.numeric(1:10)))
    names(dat) <- c('x', 'y')
    bench::mark(
      hrec1 = (recipe(formula = formula, data = dat) |>
                 step_transport_ogata_banks(time = x,
                                            distance = y) |>
                 plate("dt")),
      check = FALSE,
      min_iterations = 10
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 3 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100   1.35ms   1.39ms   718.        559KB     2.07
#> 2 hrec1        10000   4.65ms   4.72ms   209.        785KB     2.09
#> 3 hrec1      5000000    1.47s    1.47s     0.680     381MB     1.02

step_transport_fractures_solute

formula <- as.formula(~time+z+x)

dat <- setDT(expand.grid(10^(3:8),
                         seq(0.0, 10, 1),
                         c(0.0)))

names(dat) <- c("time", "z", "x")

results <- 
  bench::mark(
    hrec1 = recipe(formula = formula, data = dat) |>
      step_transport_fractures_solute(time = time,
                                      distance_fracture = z,
                                      distance_matrix = x) |>
      plate("dt"),
    check = FALSE,
    min_iterations = 10
  )

results
#> # A tibble: 1 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1        1.78ms   1.84ms      538.     580KB        0

step_transport_fractures_heat

formula <- as.formula(~time+z+x)

dat <- setDT(expand.grid(10^(3:8),
                         seq(0.0, 100, 1),
                         c(0.0, 0.05)))

names(dat) <- c("time", "z", "x")

results <- 
  bench::mark(
    hrec1 = recipe(formula = formula, data = dat) |>
      step_transport_fractures_heat(time = time,
                                    distance_fracture = z,
                                    distance_matrix = x) |>
      plate("dt"),
    check = FALSE,
    min_iterations = 10
  )

results
#> # A tibble: 1 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1        10.3ms   10.3ms      96.5     587KB        0

step_pgram and step_welch

formula <- as.formula(y~x + z)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), y = rnorm(rows), z = rnorm(rows),
                      q = rnorm(rows), r = rnorm(rows), s = rnorm(rows))
    bench::mark(
    hrec1 = recipe(formula = formula, data = dat) |>
      step_fft_pgram(c(x, y), 
                 3,
                 TRUE,
                 TRUE,
                 FALSE,
                 0.1) |> 
      plate(),
    hrec2 = recipe(formula = formula, data = dat) |>
      step_fft_pgram(c(x, y), 
                 3,
                 TRUE,
                 TRUE,
                 TRUE,
                 0.1) |> 
      plate(),
    hrec3 = recipe(formula = formula, data = dat) |>
      step_fft_welch(c(x, y),
                 length_subset =  nrow(dat) / 10,
                 overlap = 0.60,
                 window = window_nuttall(nrow(dat) / 10)) |>
      plate(),
      check = FALSE,
      min_iterations = 3
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 9 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100   1.65ms    1.7ms    586.    566.41KB     2.76
#> 2 hrec2          100   1.68ms   1.72ms    574.     18.21KB     0   
#> 3 hrec3          100   1.48ms   1.54ms    647.     546.3KB     2.56
#> 4 hrec1        10000   2.39ms   2.43ms    410.      1.38MB     0   
#> 5 hrec2        10000   2.25ms   2.38ms    421.      1.07MB     0   
#> 6 hrec3        10000   2.71ms    2.8ms    356.    260.91KB     2.58
#> 7 hrec1      5000000 471.55ms 487.89ms      2.05  686.65MB     1.02
#> 8 hrec2      5000000 414.88ms 414.88ms      2.41  534.06MB     4.82
#> 9 hrec3      5000000 443.85ms 454.35ms      2.22  125.89MB     0

step_transfer_welch and step_transfer_pgram

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), y = rnorm(rows))
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_fft_transfer_pgram(c(x, y), 
                                3,
                                TRUE,
                                TRUE,
                                0.1) |> 
        plate("dt"),
      hrec2 = recipe(formula = formula, data = dat) |>
        step_fft_transfer_welch(c(x, y),
                                length_subset =  nrow(dat) / 10,
                                overlap = 0.60,
                                window = window_nuttall(nrow(dat) / 10)) |> 
        plate("dt"),
      check = FALSE,
      min_iterations = 2
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 6 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1          100   1.65ms   1.72ms    572.     548.3KB     0   
#> 2 hrec2          100   1.54ms   1.58ms    631.     542.9KB     0   
#> 3 hrec1        10000   3.22ms   3.35ms    291.     471.7KB     0   
#> 4 hrec2        10000   2.88ms   2.94ms    340.     198.3KB     0   
#> 5 hrec1      5000000 949.08ms 949.08ms      1.05   229.1MB     1.05
#> 6 hrec2      5000000 489.63ms 494.47ms      2.02    95.4MB     0

step_ols

formula <- as.formula(y~x+z)


results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows), 
                      y = rnorm(rows),
                      z = rnorm(rows))
    bench::mark(
      hrec = recipe(formula = formula, data = dat) |>
        step_center(x) |>
        step_scale(z) |>
        step_ols(formula = formula) |>
        plate(),
      check = FALSE,
      relative = relative
    )
  }
)
#> Running with:
#>      rows
#> 1     100
#> 2   10000
#> 3 5000000

results
#> # A tibble: 3 × 7
#>   expression    rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec           100   2.68ms   2.77ms    356.    812.21KB     0   
#> 2 hrec         10000   4.63ms   4.66ms    214.      1.34MB     0   
#> 3 hrec       5000000 208.04ms  212.5ms      4.71  667.58MB     2.35

step_ols_gap_fill

set.seed(123)
n <- 100000
frm <- formula(x ~ y + z)


x <- cumsum(rnorm(n))
dat <- data.table(x = x, y = x, z = as.numeric(1:n))
dat[, x := x + c(rep(20, n/2), rep(0, n/2))]
dat[, x := x + 3.0 * sin(z * 1/n)]
tmp <- copy(dat$x)

# Set value to NA.  These values will be estimated.
dat[60000:70000, x := NA_real_]

dat <- unclass(dat)

bench::mark(
  {h = recipe(formula = frm, data = dat) |>
    step_find_interval(z, vec = c(0, n/2, n)) |>
    step_intercept() |>
    step_spline_b(z, df = 4) |>
    step_drop_columns(z)
  
  hrec = recipe(formula = frm, data = dat) |>
    step_ols_gap_fill(c(x, y, z), recipe = h) |>
    plate()},
)
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#>  num [1:100000, 1:8] -0.56 -0.791 0.768 0.839 0.968 ...
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : NULL
#>   ..$ : chr [1:8] "y" "find_interval_z_1" "find_interval_z_2" "intercept" ...
#> NULL
#> # A tibble: 1 × 6
#>   expression                             min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                           <bch> <bch:>     <dbl> <bch:byt>    <dbl>
#> 1 { h = step_drop_columns(step_spline…  26ms 26.1ms      38.3    38.2MB     2.39

check

step_check_spacing

formula <- as.formula(y~x)

results <- bench::press(
  rows = n,
  {
    dat <- data.frame(x = rnorm(rows),
                      y = 1:rows)
    dat[9, "x"] <- NA
    dat[9, "y"] <- NA
    
    bench::mark(
      hrec1 = recipe(formula = formula, data = dat) |>
        step_check_spacing(y) |>
        step_check_na(y) |>
        prep() |>
        bake(),
      hrec2 =recipe(formula = formula, data = dat) |>
        step_check_spacing(x) |>
        step_check_na(x) |>
        prep() |>
        bake(),
      check = FALSE,
      relative = relative,
      min_iterations = 10
    )
  }
)
#> Running with:
#>     rows
#> 1 100000

results
#> # A tibble: 2 × 7
#>   expression   rows      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>  <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 hrec1      100000   1.89ms   1.93ms      510.    2.59MB        0
#> 2 hrec2      100000   2.32ms   2.36ms      422.  782.89KB        0