Benchmarking
benchmarking.Rmd
Timings hydrorecipes vs recipes
Timings for the hydrorecipes package are prefaced with an “h”. The first few comparisons include the R6 interface in hydrorecipes to check if there is a loss of speed compared to the standard API. Most users are likely to use the standard API so the remaining benchmarks only present that. Typical speed improvements are between 2-10x and memory consumption is typically half of the recipes package.
creating a recipe
relative <- TRUE
n <- c(1e2, 1e4, 5e6)
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat),
hrec2 = recipe(formula = formula, data = dat),
rec = recipes::recipe(formula = formula, data = dat),
check = FALSE,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec1 100 1 1.00 5.80 532. 1
#> 2 hrec2 100 1.01 1 5.76 1 1.02
#> 3 rec 100 6.01 5.78 1 5187. 1.54
#> 4 hrec1 10000 1 1 5.70 1 2.02
#> 5 hrec2 10000 1.01 1.01 5.48 1 2.02
#> 6 rec 10000 5.92 5.70 1 4.34 1
#> 7 hrec1 5000000 1 1.00 5.69 1 2.02
#> 8 hrec2 5000000 1.00 1 5.75 1 2.02
#> 9 rec 5000000 5.93 5.72 1 4.34 1
add a step
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
add_step(hydrorecipes:::StepCenter$new(x)),
hrec2 = recipe(formula = formula, data = dat) |>
step_center(x),
rec = {recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x)},
check = FALSE,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec1 100 1 1 3.89 54.9 2.01
#> 2 hrec2 100 1.04 1.04 3.68 1 2.01
#> 3 rec 100 3.99 3.88 1 10.5 1
#> 4 hrec1 10000 1 1 3.85 1 1
#> 5 hrec2 10000 1.04 1.04 3.75 1 1.00
#> 6 rec 10000 4.00 3.87 1 1.32 1.01
#> 7 hrec1 5000000 1 1 3.87 1 2.02
#> 8 hrec2 5000000 1.04 1.04 3.73 1 2.02
#> 9 rec 5000000 4.02 3.86 1 1.32 1
step_center prep
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
add_step(hydrorecipes:::StepCenter$new(x))
hrec2 = recipe(formula = formula, data = dat) |>
step_center(x)
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x)
bench::mark(
hrec1$prep(),
hrec2 |> prep(),
rec |> recipes::prep(),
check = FALSE,
min_iterations = 1L,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec1$prep() 100 1.00 1 80.8 53.8 1.00
#> 2 prep(hrec2) 100 1 1.00 82.7 1 1
#> 3 recipes::prep(rec) 100 85.2 83.9 1 1516. 1.05
#> 4 hrec1$prep() 10000 1.00 1 80.0 NaN 1.01
#> 5 prep(hrec2) 10000 1 1.00 79.2 NaN 1
#> 6 recipes::prep(rec) 10000 83.8 81.8 1 Inf 1.06
#> 7 hrec1$prep() 5000000 1.01 1 98.8 NaN NaN
#> 8 prep(hrec2) 5000000 1 1.00 98.9 NaN NaN
#> 9 recipes::prep(rec) 5000000 50.6 123. 1 Inf Inf
step_center prep and bake
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
add_step(hydrorecipes:::StepCenter$new(x))
hrec2 = recipe(formula = formula, data = dat) |>
step_center(x)
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x)
bench::mark(
hrec1$prep()$bake(),
hrec2 |> prep() |> bake(),
rec |> recipes::prep() |> recipes::bake(new_data = NULL),
check = FALSE,
min_iterations = 1L,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec1$prep()$bake() 1e2 1 1.00 71.5 47.6 1.00
#> 2 bake(prep(hrec2)) 1e2 1.00 1 73.2 1 1
#> 3 recipes::bake(recipes::prep(r… 1e2 77.0 74.4 1 45.6 1.06
#> 4 hrec1$prep()$bake() 1e4 1 1 74.6 1 1.00
#> 5 bake(prep(hrec2)) 1e4 1.01 1.01 74.1 1 1
#> 6 recipes::bake(recipes::prep(r… 1e4 77.5 75.1 1 3.44 1.07
#> 7 hrec1$prep()$bake() 5e6 1.00 1 65.0 1 NaN
#> 8 bake(prep(hrec2)) 5e6 1 1.00 64.9 1 NaN
#> 9 recipes::bake(recipes::prep(r… 5e6 47.6 47.2 1 3.00 Inf
step_center
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_center(x) |>
plate())[["x"]],
rec = (recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x) |>
recipes::prep() |>
recipes::bake(new_data = NULL))[["x"]],
check = TRUE,
min_iterations = 1L,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 14.7 5.19 2.00
#> 2 rec 100 14.9 14.6 1 1 1
#> 3 hrec 10000 1 1 14.5 1 1
#> 4 rec 10000 14.7 14.5 1 3.29 1.06
#> 5 hrec 5000000 1 1 11.1 1 4.51
#> 6 rec 5000000 11.7 11.2 1 3.00 1
step_scale
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_scale(x, fun = fsd, n_sd = 2L) |>
plate())[["x"]],
rec = (recipes::recipe(formula = formula, data = dat) |>
recipes::step_scale(x, factor = 2L) |>
recipes::prep() |>
recipes::bake(new_data = NULL))[["x"]],
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 14.1 4.50 1.98
#> 2 rec 100 14.5 14.0 1 1 1
#> 3 hrec 10000 1 1 13.4 1 1
#> 4 rec 10000 13.6 13.3 1 2.82 1.05
#> 5 hrec 5000000 1 1 2.88 1 1
#> 6 rec 5000000 2.86 2.89 1 2.50 1.04
step_intercept
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_intercept() |>
plate("tbl"))[["intercept"]],
rec = (recipes::recipe(formula = formula, data = dat) |>
recipes::step_intercept() |>
recipes::prep() |>
recipes::bake(new_data = NULL))[["intercept"]],
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 14.4 4.65 1.97
#> 2 rec 100 14.7 14.5 1 1 1
#> 3 hrec 10000 1 1 14.5 1 1
#> 4 rec 10000 14.5 14.4 1 1.40 1.05
#> 5 hrec 5000000 1 1 5.84 1 4.52
#> 6 rec 5000000 8.72 5.79 1 1.00 1
step_normalize
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows),
z = rnorm(rows))
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_normalize(c(x, z, y)) |>
plate("tbl"))[, c("x", "z", "y")],
hrec2 = (recipe(formula = formula, data = dat) |>
step_center(c(x, z, y)) |>
step_scale(c(x, z, y)) |>
plate("tbl"))[, c("x", "z", "y")],
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_normalize(x, y, z) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
relative = relative,
min_iterations = 1L,
check = TRUE
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec1 100 1 1 14.0 52.9 2.02
#> 2 hrec2 100 1.36 1.35 10.3 1 1
#> 3 rec 100 14.2 14.0 1 8.58 2.17
#> 4 hrec1 10000 1 1 12.1 1 2.02
#> 5 hrec2 10000 1.31 1.30 9.37 1.00 2.02
#> 6 rec 10000 12.1 12.3 1 1.41 1
#> 7 hrec1 5000000 1 1 1.85 1 1.85
#> 8 hrec2 5000000 1.03 1.00 1.85 1.00 1.39
#> 9 rec 5000000 1.86 2.36 1 1.33 1
step_drop_columns
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows),
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_drop_columns(z) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_rm(z) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = TRUE,
relative = relative
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 14.8 7.53 2.02
#> 2 rec 100 15.1 14.8 1 1 1
#> 3 hrec 10000 1 1 14.7 1 1
#> 4 rec 10000 15.0 14.8 1 57.2 1.05
#> 5 hrec 5000000 1 1 40.9 1 1
#> 6 rec 5000000 41.7 40.8 1 14563. 1.00
step_subset_na_omit
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(1e6, 1e7),
{
dat <- tibble(x = rnorm(rows),
z = rnorm(rows),
y = rnorm(rows))
dat[1:5, "x"] <- NA_real_
dat[100:150, "z"] <- NA_real_
dat[10000:15000, "y"] <- NA_real_
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_subset_na_omit(terms = x) |>
prep() |>
bake())$get_result("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_naomit(x) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
relative = FALSE,
min_iterations = 1L,
check = TRUE
)
}
)
#> Running with:
#> rows
#> 1 1000000
#> 2 10000000
results
#> # A tibble: 4 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 1000000 6.1ms 6.46ms 139. 34.9MB 19.8
#> 2 rec 1000000 29.5ms 30.61ms 32.3 35MB 4.61
#> 3 hrec1 10000000 75.4ms 75.56ms 13.2 343.3MB 22.0
#> 4 rec 10000000 153.6ms 153.58ms 6.51 343.4MB 19.5
step_subset_rows
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(1e6, 1e7),
{
dat <- tibble(x = rnorm(rows),
z = rnorm(rows),
y = rnorm(rows))
sub <- sample(1:rows, size = 5e5)
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_subset_rows(row_numbers = sub) |>
prep() |>
bake())$get_result("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_slice(sub) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
base = dat[sub, ],
relative = FALSE,
min_iterations = 1L,
check = TRUE
)
}
)
#> Running with:
#> rows
#> 1 1000000
#> 2 10000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 1000000 4.6ms 5.5ms 161. 12MB 6.42
#> 2 rec 1000000 36.2ms 37.1ms 25.3 30.7MB 0
#> 3 base 1000000 9.38ms 13.8ms 80.3 19.1MB 2.11
#> 4 hrec1 10000000 9.45ms 12.5ms 81.6 11.4MB 2.09
#> 5 rec 10000000 94.72ms 95.2ms 10.4 64.9MB 2.09
#> 6 base 10000000 11.53ms 12.2ms 79.3 19.1MB 4.29
step_subset_sample
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(1e6, 1e7),
{
dat <- data.frame(x = rnorm(rows),
z = rnorm(rows),
y = rnorm(rows))
bench::mark(
h <- {hrec1 = recipe(formula = formula, data = dat) |>
step_subset_sample(size = 10000L) |>
prep() |>
bake()
h = nrow(hrec1$get_result("tbl"))},
rec = nrow(recipes::recipe(formula = formula, data = dat) |>
recipes::step_sample(size = 10000 / rows) |>
recipes::prep() |>
recipes::bake(new_data = NULL)),
relative = FALSE,
min_iterations = 1L,
check = TRUE
)
}
)
#> Running with:
#> rows
#> 1 1000000
#> 2 10000000
results
#> # A tibble: 4 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 h <- { hrec1 = bake(prep(s… 1e6 1.67ms 1.74ms 571. 851.53KB 2.03
#> 2 rec 1e6 25.63ms 26.18ms 38.0 8.45MB 2.11
#> 3 h <- { hrec1 = bake(prep(s… 1e7 1.99ms 2.05ms 473. 315.12KB 3.31
#> 4 rec 1e7 86.01ms 86.49ms 11.5 76.9MB 0
step_cross_correlation
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_cross_correlation(c(x, z, y), lag_max = 1000) |>
plate("tbl"),
min_iterations = 1L,
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 3 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.28ms 1.32ms 752. 550KB 2.02
#> 2 hrec1 10000 1.98ms 2.02ms 494. 18.1KB 4.10
#> 3 hrec1 5000000 524.51ms 524.51ms 1.91 18.1KB 0
x <- rnorm(5e5)
y <- rnorm(5e5)
lag_max <- 1000
results <- bench::mark(fft_ccf <- hydrorecipes:::convolve_correlation(x, y, lag_max),
ccf_base <- as.numeric(ccf(x, y, lag.max = lag_max, plot = FALSE)$acf),
min_iterations = 1L,
check = TRUE
)
results
#> # A tibble: 2 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 fft_ccf <- hydrorecipes:::convol… 32.05ms 32.88ms 30.3 15.7KB 0
#> 2 ccf_base <- as.numeric(ccf(x, y,… 1.94s 1.94s 0.516 143.7MB 0
step_lag
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = as.numeric(1:rows),
z = rnorm(rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_lead_lag(x, lag = 1:30) |>
plate("tbl")),
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_lag(x, lag = 1:30) |>
recipes::prep() |>
recipes::bake(new_data = NULL)),
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec1 100 1 1 8.28 2.34 1
#> 2 rec 100 8.36 8.28 1 1 1.04
#> 3 hrec1 10000 1 1 7.44 1 1.98
#> 4 rec 10000 7.44 7.41 1 2.58 1
#> 5 hrec1 5000000 1 1 3.03 1 1
#> 6 rec 5000000 3.60 3.03 1 2.55 1.32
step_distributed_lag
formula <- as.formula(y~x)
results <- bench::press(
rows = c(5e5, 5e6, 1e7),
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_distributed_lag(x, knots = log_lags(5, 86401)) |>
prep() |> bake(),
check = FALSE,
relative = FALSE,
min_iterations = 1L
)
}
)
#> Running with:
#> rows
#> 1 500000
#> 2 5000000
#> 3 10000000
results
#> # A tibble: 3 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 500000 64.4ms 75.7ms 13.3 19.1MB 0
#> 2 hrec 5000000 518.1ms 518.1ms 1.93 155.9MB 0
#> 3 hrec 10000000 883.6ms 883.6ms 1.13 308.5MB 0
step_harmonic
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_value = 0.0) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_val = 0.0,
keep_original_cols = TRUE) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
# sin and cos terms order is different
check = FALSE,
relative = relative,
min_iterations = 1L
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 14.5 3.26 2.00
#> 2 rec 100 14.5 14.4 1 1 1
#> 3 hrec 10000 1 1 7.54 1 NaN
#> 4 rec 10000 7.52 7.59 1 3.55 Inf
#> 5 hrec 5000000 1 1 1.20 1 NaN
#> 6 rec 5000000 1.20 1.20 1 3.50 Inf
# rows <- 1e6
# dat <- data.frame(x = rnorm(rows),
# y = 1:rows,
# z = rnorm(rows))
# bench::mark(
#
# {hrec = recipe(formula = formula, data = dat) |>
# step_harmonic(x,
# frequency = c(1.0, 2.0, 3.0),
# cycle_size = 0.1,
# starting_value = 0.0,
# varying = "cycle_size") |>
# step_harmonic(x,
# frequency = c(1.0, 2.0, 3.0),
# cycle_size = 0.1,
# starting_value = 0.0) |>
# step_intercept() |>
# step_center(x) |>
# prep() |>
# bake()},
#
# {hrec$steps[[2]]$update_step("cycle_size", 0.2)
# hrec$bake()
# },
# check = FALSE
# )
step_pca
set.seed(1)
formula <- as.formula(x~a + b + c + d + e + f + g + h + i + j + k + l)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
a = rnorm(rows),
b = rnorm(rows),
c = rnorm(rows),
d = rnorm(rows),
e = rnorm(rows),
f = rnorm(rows),
g = rnorm(rows),
h = rnorm(rows),
i = rnorm(rows),
j = rnorm(rows),
k = rnorm(rows),
l = rnorm(rows)
)
bench::mark(
hrec1 = recipe(formula = formula, data = dat)|>
step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l), n_comp = 10L) |>
plate(),
hrec2 = recipe(formula = formula, data = dat)|>
step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l), n_comp = 5L) |>
plate(),
hrec3 = recipe(formula = formula, data = dat)|>
step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l),
n_comp = 10L,
center = FALSE,
scale = FALSE) |>
plate(),
hrec4 = recipe(formula = formula, data = dat)|>
step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l),
n_comp = 5L,
center = FALSE,
scale = FALSE) |>
plate(),
rec1 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 10L,
options = list(center = TRUE, scale. = TRUE))|>
recipes::prep() |>
recipes::bake(new_data = NULL),
rec2 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 5L,
options = list(center = TRUE, scale. = TRUE)) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
rec3 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 10L) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
rec4 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 5L) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = FALSE,
relative = relative,
min_iterations = 1L
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
print(results, n = 100)
#> # A tibble: 24 × 14
#> expression rows min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl>
#> 1 hrec1 100 1.03 1.03 14.7 15.3 1 323 1
#> 2 hrec2 100 1.03 1.03 14.6 1 2.03 316 2
#> 3 hrec3 100 1.01 1.00 15.0 1.20 2.03 327 2
#> 4 hrec4 100 1 1 15.0 1 2.07 318 2
#> 5 rec1 100 14.9 14.7 1.02 13.3 2.15 21 2
#> 6 rec2 100 14.5 14.6 1 6.04 2.20 20 2
#> 7 rec3 100 14.8 14.7 1.01 3.35 1.01 22 1
#> 8 rec4 100 14.7 14.6 1.01 2.99 2.13 21 2
#> 9 hrec1 10000 1.62 1.88 7.87 1.22 NaN 86 0
#> 10 hrec2 10000 1.60 1.57 8.95 1 Inf 94 1
#> 11 hrec3 10000 1.03 1.61 9.95 1.22 Inf 100 1
#> 12 hrec4 10000 1 1 12.8 1 NaN 139 0
#> 13 rec1 10000 14.1 13.9 1 6.02 Inf 10 1
#> 14 rec2 10000 13.9 13.8 1.08 5.69 Inf 11 1
#> 15 rec3 10000 11.8 11.7 1.27 2.24 Inf 13 1
#> 16 rec4 10000 11.5 11.5 1.29 1.92 Inf 13 1
#> 17 hrec1 5000000 1.81 1.81 4.96 1.22 1 1 1
#> 18 hrec2 5000000 1.84 1.84 4.88 1 1.97 1 2
#> 19 hrec3 5000000 1 1 8.99 1.22 1.81 1 1
#> 20 hrec4 5000000 1.05 1.05 8.56 1 1.73 1 1
#> 21 rec1 5000000 8.99 8.99 1 6.01 1.81 1 9
#> 22 rec2 5000000 8.40 8.40 1.07 5.68 1.73 1 8
#> 23 rec3 5000000 3.73 3.73 2.41 2.23 1.46 1 3
#> 24 rec4 5000000 3.59 3.59 2.50 1.90 1.01 1 2
#> # ℹ 5 more variables: total_time <bch:tm>, result <list>, memory <list>,
#> # time <list>, gc <list>
step_dummy
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = qF(sample(1:10, rows, replace = TRUE)),
z = rnorm(rows))
bench::mark(
hrec = unname(recipe(formula = formula, data = dat) |>
step_dummy(y) |>
plate("tbl"))[,3:11],
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_dummy(y, keep_original_cols = TRUE) |>
recipes::prep() |>
recipes::bake(new_data = NULL))[,3:11],
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 8.01 1 1
#> 2 rec 100 7.94 7.92 1 1.30 1.06
#> 3 hrec 10000 1 1 11.8 1 2.02
#> 4 rec 10000 11.6 11.6 1 17.6 1
#> 5 hrec 5000000 1 1 183. 1 9.13
#> 6 rec 5000000 223. 183. 1 12.9 1
step_find_interval
- no direct comparison so compare to step_cut
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_find_interval(x, vec = c(-0.1, 0, 0.1)) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_cut(x, breaks = c(-0.1, 0, 0.1)) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = FALSE,
relative = relative,
min_iterations = 1L
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 14.2 3.25 NaN
#> 2 rec 100 14.2 13.9 1 1 Inf
#> 3 hrec 10000 1 1 13.1 1 Inf
#> 4 rec 10000 13.1 13.0 1 3.69 NaN
#> 5 hrec 5000000 1 1 5.25 1 Inf
#> 6 rec 5000000 5.42 5.13 1 3.50 NaN
step_varying
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rep(1, rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_varying(c(x, y, z)) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_zv(x, y, z) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 14.7 6.28 NaN
#> 2 rec 100 15.0 14.8 1 1 Inf
#> 3 hrec 10000 1 1 15.5 1 1
#> 4 rec 10000 15.6 15.4 1 32.9 1.03
#> 5 hrec 5000000 1 1 73.4 1 NaN
#> 6 rec 5000000 74.7 74.0 1 34.1 Inf
step_kernel_filter
step_kernel_filter uses an Fast Fourier Transform (FFT) based convolution instead of an explicit sliding window. This should be much faster for large datasets and particularly when the kernel size is also large.
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(2e4, 2e5),
{
dat <- data.frame(x = rep(1, rows),
y = 1:rows,
z = cumsum(rnorm(rows)))
bench::mark(
hrec = unname((recipe(formula = formula, data = dat) |>
step_kernel_filter(z, kernel = list(rep(1, 5001L)/5001L), align = "center") |>
plate("tbl"))[10000, "kernel_filter_z"]),
{rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_window(z, size = 5001L, statistic = "mean") |>
recipes::prep() |>
recipes::bake(new_data = NULL)
unname(rec[10000, "z"])},
min_iterations = 1L,
relative = relative,
check = TRUE
)
}
)
#> Running with:
#> rows
#> 1 20000
#> 2 200000
results
#> # A tibble: 4 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 2e4 1 1 85.7 1.48 NaN
#> 2 { rec = recipes::bake(recipes… 2e4 86.3 85.9 1 1 NaN
#> 3 hrec 2e5 1 1 630. 1 Inf
#> 4 { rec = recipes::bake(recipes… 2e5 656. 645. 1 1.99 NaN
step_convolve_gamma
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(2e4, 2e6),
{
dat <- data.frame(x = rep(1, rows),
y = 1:rows,
z = cumsum(rnorm(rows)))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_convolve_gamma(z, amplitude = 1, k = 1, theta = 1) |>
plate("tbl")),
min_iterations = 1,
relative = FALSE,
check = TRUE
)
}
)
#> Running with:
#> rows
#> 1 20000
#> 2 2000000
results
#> # A tibble: 2 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec 20000 1.79ms 1.86ms 537. 714.2KB 0
#> 2 hrec 2000000 27.86ms 28.49ms 34.9 15.3MB 0
multiple steps
step_harmonic dominates these results.
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = 1:rows)
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_lead_lag(x, lag = 1:20) |>
step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_value = 0.0) |>
step_center(x) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_lag(x, lag = 1:20, keep_original_cols = TRUE) |>
recipes::step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_val = 0.0,
keep_original_cols = TRUE) |>
recipes::step_center(x) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = FALSE,
relative = relative,
min_iterations = 1
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 15.1 1 NaN
#> 2 rec 100 15.3 15.1 1 6.39 Inf
#> 3 hrec 10000 1 1 11.3 1 1
#> 4 rec 10000 11.4 11.4 1 2.70 1.02
#> 5 hrec 5000000 1 1 1.32 1 1
#> 6 rec 5000000 1.32 1.32 1 2.64 1.13
step_spline_b
formula <- as.formula(y~x)
n <- c(100, 1e4, 5e6)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec = unname(recipe(formula = formula, data = dat) |>
step_spline_b(x, df = 13) |>
plate("tbl")),
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_spline_b(x, deg_free = 13, keep_original_cols = TRUE)|>
recipes::prep() |>
recipes::bake(new_data = NULL)),
check = TRUE,
relative = relative,
min_iterations = 2
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 7.78 2.25 NaN
#> 2 rec 100 7.77 7.75 1 1 Inf
#> 3 hrec 10000 1 1 6.19 1 Inf
#> 4 rec 10000 6.26 6.16 1 4.24 NaN
#> 5 hrec 5000000 1 1 3.03 1 1.82
#> 6 rec 5000000 2.92 3.03 1 4.21 1
step_spline_n
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec = unname(recipe(formula = formula, data = dat) |>
step_spline_n(x, df = 11L) |>
plate("tbl")),
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_spline_natural(x, deg_free = 11L, keep_original_cols = TRUE)|>
recipes::prep() |>
recipes::bake(new_data = NULL)),
check = TRUE,
relative = relative,
min_iterations = 2L
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec 100 1 1 7.47 2.93 NaN
#> 2 rec 100 7.51 7.46 1 1 Inf
#> 3 hrec 10000 1 1 5.86 1 Inf
#> 4 rec 10000 5.93 5.92 1 4.03 NaN
#> 5 hrec 5000000 1 1 3.22 1 1
#> 6 rec 5000000 3.05 3.22 1 4.00 1.55
step_add_noise
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_add_noise(y) |>
plate("dt"))
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 3 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.22ms 1.26ms 790. 544.7KB 0
#> 2 hrec1 10000 1.56ms 1.61ms 616. 160.5KB 2.07
#> 3 hrec1 5000000 189.19ms 189.31ms 5.28 76.3MB 0
step_aquifer_grf & step_aquifer_theis
The Theis solution is a subset of the grf solution.
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_aquifer_grf(time = x, flow_rate = y) |>
plate("dt"),
hrec2 = recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x, flow_rate = y) |>
plate("dt"),
check = TRUE)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.35ms 1.4ms 714. 557.3KB 2.08
#> 2 hrec2 100 1.46ms 1.51ms 660. 531.9KB 2.19
#> 3 hrec1 10000 2.3ms 2.36ms 420. 160.6KB 0
#> 4 hrec2 10000 2.38ms 2.48ms 402. 83.6KB 0
#> 5 hrec1 5000000 665.94ms 665.94ms 1.50 76.3MB 1.50
#> 6 hrec2 5000000 637.58ms 637.58ms 1.57 38.2MB 0
step_aquifer_theis_aniso
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_theis_aniso(time = x,
flow_rate = y,
distance_x = 0,
distance_y = 100,
hydraulic_conductivity_major = 1e-4,
hydraulic_conductivity_minor = 1e-4) |>
plate("dt")),
hrec2 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x, flow_rate = y,) |>
plate("dt")),
check = TRUE)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.47ms 1.52ms 654. 626.09KB 0
#> 2 hrec2 100 1.56ms 1.6ms 620. 7.98KB 2.07
#> 3 hrec1 10000 2.39ms 2.43ms 409. 162.39KB 2.08
#> 4 hrec2 10000 2.46ms 2.5ms 398. 85.33KB 0
#> 5 hrec1 5000000 632.39ms 632.39ms 1.58 76.3MB 0
#> 6 hrec2 5000000 630.01ms 630.01ms 1.59 38.15MB 0
step_aquifer_leaky
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_leaky(time = x,
flow_rate = y,
leakage = 100000000) |>
plate("dt")),
hrec2 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x,
flow_rate = y) |>
plate("dt")),
check = TRUE)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.49ms 1.53ms 652. 563.34KB 2.10
#> 2 hrec2 100 1.57ms 1.61ms 617. 7.98KB 2.11
#> 3 hrec1 10000 3.25ms 3.31ms 298. 396.84KB 0
#> 4 hrec2 10000 2.47ms 2.51ms 398. 85.33KB 0
#> 5 hrec1 5000000 1.33s 1.33s 0.749 190.74MB 0
#> 6 hrec2 5000000 655.34ms 655.34ms 1.53 38.15MB 1.53
step_aquifer_patch
formula <- as.formula(y~x)
results <- bench::press(
rows = c(1e4, 1e5, 1e6),
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_aquifer_grf(time = x, flow_rate = y) |>
plate("dt")),
hrec3 = (recipe(formula = formula, data = dat) |>
step_aquifer_patch(time = x,
flow_rate = 0.01,
thickness = 1.0,
radius = 100.0,
radius_patch = 200.0,
specific_storage_inner = 1e-6,
specific_storage_outer = 1e-6,
hydraulic_conductivity_inner = 1e-4,
hydraulic_conductivity_outer = 1e-4,
n_stehfest = 8L
) |>
plate("dt")),
check = FALSE,
relative = relative)
}
)
#> Running with:
#> rows
#> 1 10000
#> 2 100000
#> 3 1000000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 hrec1 10000 1 1 52.6 1 NaN
#> 2 hrec3 10000 53.8 53.2 1 4.00 NaN
#> 3 hrec1 100000 1 1 97.3 1.99 NaN
#> 4 hrec3 100000 101. 98.2 1 1 NaN
#> 5 hrec1 1000000 1 1 77.8 2.00 NaN
#> 6 hrec3 1000000 81.2 77.3 1 1 NaN
step_aquifer_wellbore_storage
- currently this is slow for long series.
results <- bench::press(
rows = c(1e3, 1e4, 1e5),
{
dat <- data.frame(x = as.numeric(1:rows),
y = as.numeric(1:rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_wellbore_storage(time = x,
flow_rate = 0.01,
hydraulic_conductivity = 1e-4,
specific_storage = 1e-6,
radius = 100,
radius_casing = 1e-15,
radius_well = 1e-15, n_terms = 18) |>
plate("dt")),
hrec2 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x,
flow_rate = y) |>
plate("dt")),
check = FALSE
)
}
)
#> Running with:
#> rows
#> 1 1000
#> 2 10000
#> 3 100000
results
#> # A tibble: 6 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 1000 8.59ms 8.64ms 116. 574.91KB 0
#> 2 hrec2 1000 1.68ms 1.74ms 564. 22.88KB 2.20
#> 3 hrec1 10000 59.42ms 59.71ms 16.7 162.36KB 0
#> 4 hrec2 10000 2.51ms 2.56ms 387. 163.5KB 2.22
#> 5 hrec1 100000 532.97ms 532.97ms 1.88 1.53MB 0
#> 6 hrec2 100000 10.08ms 10.33ms 96.4 1.53MB 0
step_vadose_weeks
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = as.numeric(1:rows))
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_vadose_weeks(time = x,
air_diffusivity = 0.8,
thickness = 5,
precision = 1e-12) |>
plate("dt")),
check = FALSE,
min_iterations = 2
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 3 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.23ms 1.26ms 790. 548.6KB 2.06
#> 2 hrec1 10000 1.39ms 1.42ms 700. 160.5KB 0
#> 3 hrec1 5000000 173.68ms 173.89ms 5.75 76.3MB 0
step_transport_ogata_banks
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(expand.grid(as.numeric(1:rows),
as.numeric(1:10)))
names(dat) <- c('x', 'y')
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_transport_ogata_banks(time = x,
distance = y) |>
plate("dt")),
check = FALSE,
min_iterations = 2
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 3 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.31ms 1.35ms 730. 562KB 2.09
#> 2 hrec1 10000 5.08ms 5.12ms 194. 786KB 0
#> 3 hrec1 5000000 1.57s 1.57s 0.635 381MB 0.635
step_transport_fractures_solute
formula <- as.formula(~time+z+x)
dat <- setDT(expand.grid(10^(3:8),
seq(0.0, 10, 1),
c(0.0)))
names(dat) <- c("time", "z", "x")
results <-
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_transport_fractures_solute(time = time,
distance_fracture = z,
distance_matrix = x) |>
plate("dt"),
check = FALSE,
min_iterations = 2
)
results
#> # A tibble: 1 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 1.72ms 1.76ms 566. 586KB 0
step_transport_fractures_heat
formula <- as.formula(~time+z+x)
dat <- setDT(expand.grid(10^(3:8),
seq(0.0, 100, 1),
c(0.0, 0.05)))
names(dat) <- c("time", "z", "x")
results <-
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_transport_fractures_heat(time = time,
distance_fracture = z,
distance_matrix = x) |>
plate("dt"),
check = FALSE,
min_iterations = 2
)
results
#> # A tibble: 1 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 9.86ms 9.89ms 101. 594KB 0
step_fft_pgram, step_fft_welch
formula <- as.formula(y~x + z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows), y = rnorm(rows), z = rnorm(rows),
q = rnorm(rows), r = rnorm(rows), s = rnorm(rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_fft_pgram(c(x, y),
3,
TRUE,
TRUE,
FALSE,
0.1,
time_step = 1) |>
prep() |>
bake(),
hrec2 = recipe(formula = formula, data = dat) |>
step_fft_pgram(c(x, y),
3,
TRUE,
TRUE,
TRUE,
0.1,
time_step = 1) |>
prep() |>
bake(),
hrec3 = recipe(formula = formula, data = dat) |>
step_fft_welch(c(x, y),
length_subset = nrow(dat) / 10,
overlap = 0.60,
window = window_nuttall(nrow(dat) / 10),
time_step = 1) |>
prep() |>
bake(),
check = FALSE,
min_iterations = 1
)
}
)
#> Running with:
#> rows
#> 1 100
#> 2 10000
#> 3 5000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100 1.68ms 1.93ms 516. 579.46KB 2.79
#> 2 hrec2 100 1.55ms 1.61ms 621. 17.76KB 0
#> 3 hrec3 100 1.38ms 1.42ms 702. 558.04KB 2.15
#> 4 hrec1 10000 2.72ms 2.81ms 356. 1.45MB 0
#> 5 hrec2 10000 2.39ms 2.52ms 379. 1.15MB 0
#> 6 hrec3 10000 2.62ms 2.69ms 371. 268.44KB 0
#> 7 hrec1 5000000 534.7ms 534.7ms 1.87 724.8MB 0
#> 8 hrec2 5000000 573.09ms 573.09ms 1.74 572.21MB 1.74
#> 9 hrec3 5000000 450.98ms 454.4ms 2.20 129.7MB 0
step_fft_transfer_welch and step_fft_transfer_pgram, step_fft_transfer_experimental
formula <- as.formula(y~x)
results <- bench::press(
rows = c(1e5, 1e6, 1e7),
{
dat <- data.frame(x = rnorm(rows), y = rnorm(rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_fft_transfer_pgram(c(x, y),
3,
TRUE,
TRUE,
0.1,
time_step = 1) |>
prep() |>
bake(),
hrec2 = recipe(formula = formula, data = dat) |>
step_fft_transfer_welch(c(x, y),
length_subset = nrow(dat) / 10,
overlap = 0.60,
window = window_nuttall(nrow(dat) / 10),
time_step = 1) |>
prep() |>
bake(),
hrec3 <- recipe(formula = formula, data = dat) |>
step_fft_transfer_experimental(c(x, y),
spans = 3,
taper = 0.1,
n_groups = 300,
time_step = 1) |>
prep() |>
bake(),
check = FALSE,
min_iterations = 1
)
}
)
#> Running with:
#> rows
#> 1 100000
#> 2 1000000
#> 3 10000000
results
#> # A tibble: 9 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 1e5 18.52ms 20.03ms 50.1 6.66MB 2.78
#> 2 hrec2 1e5 9.62ms 10.43ms 96.7 2.61MB 0
#> 3 hrec3 <- bake(prep(step_… 1e5 7.19ms 7.49ms 128. 2.48MB 0
#> 4 hrec1 1e6 190.28ms 204.38ms 4.97 61.04MB 0
#> 5 hrec2 1e6 86.24ms 87ms 11.5 20.6MB 2.86
#> 6 hrec3 <- bake(prep(step_… 1e6 58.59ms 60.28ms 16.7 19.09MB 0
#> 7 hrec1 1e7 2.06s 2.06s 0.486 610.35MB 0
#> 8 hrec2 1e7 1.03s 1.03s 0.973 206MB 0
#> 9 hrec3 <- bake(prep(step_… 1e7 903.11ms 903.11ms 1.11 190.75MB 0
step_ols
formula <- as.formula(y~.)
results <- bench::press(
rows = c(1e5, 1e6, 1e7),
{
dat <- data.frame(
y = rnorm(rows),
x = rnorm(rows),
z = rnorm(rows),
a = rnorm(rows),
b = rnorm(rows),
d = rnorm(rows),
e = rnorm(rows),
f = rnorm(rows),
g = rnorm(rows))
m <- qM(dat)
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_ols(formula = as.formula(y~.),
do_response = FALSE) |>
prep() |>
bake(),
hrec2 = recipe(formula = formula, data = dat) |>
step_ols(formula = as.formula(y~.),
do_response = TRUE) |>
prep() |>
bake(),
lm = lm(y~. - 1, dat),
lm.fit(x = m[, c(2:ncol(dat))], y = m[, 1]),
check = FALSE,
relative = FALSE
)
}
)
#> Running with:
#> rows
#> 1 100000
#> 2 1000000
#> 3 10000000
results
#> # A tibble: 12 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 1e5 6.71ms 6.86ms 131. 17.36MB 2.15
#> 2 hrec2 1e5 7.5ms 7.77ms 107. 19.13MB 0
#> 3 lm 1e5 30.53ms 31.31ms 28.5 47.75MB 2.38
#> 4 lm.fit(x = m[, c(2:ncol… 1e5 12.29ms 12.62ms 74.8 17.55MB 0
#> 5 hrec1 1e6 54.74ms 59.81ms 15.8 167.85MB 2.26
#> 6 hrec2 1e6 64.48ms 68.6ms 14.5 190.74MB 2.42
#> 7 lm 1e6 203.43ms 203.43ms 4.92 473.4MB 9.83
#> 8 lm.fit(x = m[, c(2:ncol… 1e6 137.59ms 142.62ms 7.06 175.48MB 2.35
#> 9 hrec1 1e7 681.79ms 681.79ms 1.47 1.64GB 0
#> 10 hrec2 1e7 930.26ms 930.26ms 1.07 1.86GB 1.07
#> 11 lm 1e7 2.47s 2.47s 0.405 4.67GB 1.21
#> 12 lm.fit(x = m[, c(2:ncol… 1e7 1.55s 1.55s 0.647 1.71GB 0.647
# formula <- as.formula(y~x+z)
#
#
# results <- bench::press(
# rows = n,
# {
# dat <- data.frame(x = rnorm(rows),
# y = rnorm(rows),
# z = rnorm(rows))
# bench::mark(
# hrec = recipe(formula = formula, data = dat) |>
# step_intercept() |>
# step_nls(formula = as.formula(y~.)) |>
# prep() |>
# bake(),
# check = FALSE,
# relative = FALSE
# )
# }
# )
step_nls
n0 <- 5e5
n <- 2e4
n2 <- 1e4
b <- cumsum(rnorm(n0))
b <- b - mean(b)
max_t <- 720 * ceiling(2.554)
a <- hydrorecipes:::convolve_overlap_save(x = b,
y = hydrorecipes:::gamma_3(0:max_t, 0.816, 9.221, 2.554),
0)
max_t <- 720 * ceiling(2.554)
dat <- data.frame(a = a, b = b)
formula <- formula(a~b)
# for gsl_nls
f <- function(z, x) {
max_t <- 720 * ceiling(z[3])
hydrorecipes:::convolve_overlap_save(x = x,
y = hydrorecipes:::gamma_3(0:max_t, z[1], z[2], z[3]),
align = 0)[-(1:7200)]
}
results <- bench::mark(
gsl_fun <- unname(round(coef(gsl_nls(
fn = f, ## model function
y = a[-(1:7200)], ## response vector
x = b,
start = c(A = 0.5, n = 2.0, a = 2.0), ## starting values
lower = c(A = 0.01, n = 1.0, a = 1.0),
upper = c(A = 1.0, n = 10.0, a = 10.0),
control = gsl_nls_control(xtol = 1e-8),
trace = FALSE,
algorithm = "lm" ## algorithm
)), 3)),
h_1 = {h = recipe(formula = formula, data = dat) |>
step_convolve_gamma(b, amplitude = 0.5, k = 2.0, theta = 2.0,
varying = list(name = c("amplitude","k", "theta"),
start = c(0.5, 2.0, 2.0),
lower = c(0.01, 1.0, 1.0),
upper = c(1.0, 10.0, 10.0))) |>
step_nls(formula = formula(a~b), n_subset = 1L,
trace = FALSE,
algorithm = "lm",
control = gsl_nls_control(xtol = 1e-8))
h$prep()$bake()
unname(round(coef(h$steps[[3]]$fit), 3))},
h_10 = {h = recipe(formula = formula, data = dat) |>
step_convolve_gamma(b, amplitude = 0.5, k = 2.0, theta = 2.0,
varying = list(name = c("amplitude","k", "theta"),
start = c(0.5, 2.0, 2.0),
lower = c(0.01, 1.0, 1.0),
upper = c(1.0, 10.0, 10.0))) |>
step_nls(formula = formula(a~b), n_subset = 10L,
trace = FALSE,
algorithm = "lm",
control = gsl_nls_control(xtol = 1e-8))
h$prep()$bake()
unname(round(coef(h$steps[[3]]$fit), 3))},
h_100 = {h = recipe(formula = formula, data = dat) |>
step_convolve_gamma(b, amplitude = 0.5, k = 2.0, theta = 2.0,
varying = list(name = c("amplitude","k", "theta"),
start = c(0.5, 2.0, 2.0),
lower = c(0.01, 1.0, 1.0),
upper = c(1.0, 10.0, 10.0))) |>
step_nls(formula = formula(a~b), n_subset = 100L,
trace = FALSE,
algorithm = "lm",
control = gsl_nls_control(xtol = 1e-8))
h$prep()$bake()
unname(round(coef(h$steps[[3]]$fit), 3))},
check = TRUE
)
results
#> # A tibble: 4 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 gsl_fun <- unname(round(coef(g… 1.36s 1.36s 0.734 893MB 0
#> 2 h_1 1.69s 1.69s 0.591 900MB 0
#> 3 h_10 919.88ms 919.88ms 1.09 309MB 0
#> 4 h_100 775.05ms 775.05ms 1.29 250MB 0
step_ols_gap_fill
set.seed(123)
n <- 100000
frm <- formula(x ~ y + z)
x <- cumsum(rnorm(n))
dat <- data.table(x = x, y = x, z = as.numeric(1:n))
dat[, x := x + c(rep(20, n/2), rep(0, n/2))]
dat[, x := x + 3.0 * sin(z * 1/n)]
tmp <- copy(dat$x)
# Set value to NA. These values will be estimated.
dat[60000:70000, x := NA_real_]
dat <- unclass(dat)
bench::mark(
{h = recipe(formula = frm, data = dat) |>
step_find_interval(z, vec = c(0, n/2, n)) |>
step_intercept() |>
step_spline_b(z, df = 4) |>
step_drop_columns(z)
hrec = recipe(formula = frm, data = dat) |>
step_ols_gap_fill(c(x, y, z), recipe = h) |>
prep() |>
bake()},
check = FALSE
)
#> # A tibble: 1 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:> <bch:> <dbl> <bch:byt> <dbl>
#> 1 { h = step_drop_columns(step_splin… 8.23ms 8.43ms 118. 9.4MB 0
check
step_check_spacing
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
dat[9:50, "x"] <- NA
dat[9L, "y"] <- NA
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_check_spacing(y) |>
step_check_na(y) |>
prep() |>
bake(),
hrec2 =recipe(formula = formula, data = dat) |>
step_check_spacing(x) |>
step_check_na(x) |>
prep() |>
bake(),
check = FALSE,
relative = FALSE,
min_iterations = 2
)
}
)
#> Running with:
#> rows
#> 1 100000
results
#> # A tibble: 2 × 7
#> expression rows min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 hrec1 100000 3.05ms 3.15ms 267. 3.37MB 2.17
#> 2 hrec2 100000 3.06ms 3.13ms 319. 2.29MB 0
sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] splines2_0.5.3 RcppRoll_0.3.1 tibble_3.2.1 bench_1.1.4
#> [5] hydrorecipes_0.0.6 Bessel_0.6-1 data.table_1.16.4 gslnls_1.4.1
#> [9] collapse_2.0.19
#>
#> loaded via a namespace (and not attached):
#> [1] xfun_0.50 bslib_0.8.0 htmlwidgets_1.6.4
#> [4] recipes_1.1.0 lattice_0.22-6 vctrs_0.6.5
#> [7] tools_4.4.2 generics_0.1.3 parallel_4.4.2
#> [10] pkgconfig_2.0.3 Matrix_1.7-1 desc_1.4.3
#> [13] lifecycle_1.0.4 compiler_4.4.2 textshaping_1.0.0
#> [16] codetools_0.2-20 RcppThread_2.2.0 htmltools_0.5.8.1
#> [19] class_7.3-22 sass_0.4.9 yaml_2.3.10
#> [22] gmp_0.7-5 profmem_0.6.0 prodlim_2024.06.25
#> [25] tidyr_1.3.1 pillar_1.10.1 pkgdown_2.1.1
#> [28] jquerylib_0.1.4 MASS_7.3-61 cachem_1.1.0
#> [31] gower_1.0.2 rpart_4.1.23 parallelly_1.41.0
#> [34] lava_1.8.1 tidyselect_1.2.1 digest_0.6.37
#> [37] earthtide_0.1.7 future_1.34.0 dplyr_1.1.4
#> [40] purrr_1.0.2 listenv_0.9.1 splines_4.4.2
#> [43] fastmap_1.2.0 grid_4.4.2 cli_3.6.3
#> [46] magrittr_2.0.3 utf8_1.2.4 survival_3.7-0
#> [49] future.apply_1.11.3 withr_3.0.2 Rmpfr_1.0-0
#> [52] lubridate_1.9.4 timechange_0.3.0 rmarkdown_2.29
#> [55] globals_0.16.3 nnet_7.3-19 timeDate_4041.110
#> [58] ragg_1.3.3 evaluate_1.0.3 knitr_1.49
#> [61] hardhat_1.4.0 rlang_1.1.5 Rcpp_1.0.14
#> [64] glue_1.8.0 ipred_0.9-15 jsonlite_1.8.9
#> [67] R6_2.5.1 systemfonts_1.2.1 fs_1.6.5