var_summary <- function(data, var) {
data |>
summarise(
min = min(var),
max = max(var)
)
}
mtcars |>
group_by(cyl) |>
var_summary(mpg)Using the tidyverse in your package π§Ή
Tidyverse
Demo - not part of package
Do in a new file eg scratch/tidyverse-testing.R and add scratch to Rbuildignore
data masking
Error: object mpg not found
var_summary <- function(data, var) {
data |>
summarise(
min = min({{ var }}),
max = max({{ var }})
)
}
mtcars |>
group_by(cyl) |>
var_summary(mpg)big_cars_summary <- function(var) {
mtcars |>
filter(.data$cyl >= 6) |>
group_by(.data$cyl) |>
summarise(
n = n(),
mean = mean({{ var }}),
)
}
big_cars_summary(disp)Your turn solution
height_sum <- function(data, group_var) {
data |>
dplyr::group_by({{ group_var }}) |>
dplyr::summarise(
n = dplyr::n(),
mean_height = mean(.data$height)
)
}
height_sum(starwars, hair_color)Your turn solution
height_sum <- function(data, ...) {
data |>
dplyr::group_by(...) |>
dplyr::summarise(
n = dplyr::n(),
mean_height = mean(.data$height)
)
}
height_sum(starwars, hair_color, eye_color)Dynamic dots
var_summary <- function(data, var) {
data |>
summarise(
"{{var}}_min" := min({{ var }})
)
}
mtcars |>
group_by(cyl) |>
var_summary(mpg)Your turn solution
dynamic_sum <- function(data, group_var, sum_var) {
data |>
dplyr::group_by({{ group_var }}) |>
dplyr::summarise(
n = dplyr::n(),
"mean_{{sum_var}}" := mean({{ sum_var }})
)
}
dynamic_sum(starwars, hair_color, mass)tidyselect
mtcars |>
dplyr::group_by(cyl) |>
dplyr::summarise(
dplyr::across(c("mpg", "disp"), mean)
)
cols <- c("mpg", "disp", "xyz")
mtcars |>
dplyr::group_by(cyl) |>
dplyr::summarise(
dplyr::across(dplyr::all_of(cols), mean)
)
cols <- c("mpg", "disp", "xyz")
mtcars |>
dplyr::group_by(cyl) |>
dplyr::summarise(
dplyr::across(dplyr::any_of(cols), mean)
)Using a character vector:
summy <- function(df, cols) {
df |>
group_by(.data$cyl) |>
summarise(
across(all_of(cols), .fns = mean)
)
}summy(mtcars, c("mpg", "disp"))Using bare names or tidy-select helpers:
summy <- function(df, cols) {
df |>
group_by(.data$cyl) |>
summarise(
across({{ cols }}, .fns = mean)
)
}summy(mtcars, c(mpg, disp))
summy(mtcars, starts_with("mp"))
summy(mtcars, where(is.numeric))Tidyversify libminer
use_package("dplyr")We are using the new base pipe, need R >= 4.1
lib_summary <- function(by) {
pkgs <- lib()
dplyr::group_by(pkgs, by) |>
dplyr::count()
}load_all()
lib_summary(LibPath)Errors - no column called βbyβ
2a - use curly-curly with bare names
lib_summary <- function(by) {
pkgs <- lib()
dplyr::group_by(pkgs, {{ by }}) |>
dplyr::count()
}load_all()
lib_summary(LibPath)2b - use .data[[x]] with character
use_import_from("rlang", ".data")lib_summary <- function(by) {
pkgs <- lib()
dplyr::group_by(pkgs, .data[[by]]) |>
dplyr::count()
}load_all()
lib_summary("License")3 - add sizes
lib_summary <- function(by) {
pkgs <- lib()
pkgs <- calculate_sizes(pkgs)
dplyr::group_by(pkgs, {{by}}) |>
dplyr::summarise(
n = dplyr::n(),
size = sum(size)
)
}
calculate_sizes <- function(df) {
df |>
dplyr::mutate(
size = purrr::map_dbl(
fs::path(LibPath, Package),
\(x) sum(fs::file_size(fs::dir_ls(x, recurse = TRUE)))
)
)
}load_all()
lib_summary(LibPath)
check() # should throw notes about undefined globals4 - add .data
lib_summary <- function(by) {
pkgs <- lib()
pkgs <- calculate_sizes(pkgs)
dplyr::group_by(pkgs, {{by}}) |>
dplyr::summarise(
n = dplyr::n(),
size = sum(.data$size)
)
}
calculate_sizes <- function(df) {
df |>
dplyr::mutate(
size = purrr::map_dbl(
fs::path(.data$LibPath, .data$Package),
\(x) sum(fs::file_size(fs::dir_ls(x, recurse = TRUE)))
)
)
}5 use β¦ for multiple byβs
lib_summary <- function(...) {
lib() |>
calculate_sizes() |>
dplyr::group_by(...) |>
dplyr::summarise(
n = dplyr::n(),
size = sum(.data$size)
)
}Final - make sizes conditional again
Also, drop groups so we donβt get the message any more
lib_summary <- function(..., sizes = FALSE) {
if (!is.logical(sizes)) {
stop("'sizes' must be logical (TRUE/FALSE).", call. = FALSE)
}
lib() |>
calculate_sizes(do_calc = sizes) |>
dplyr::group_by(...) |>
dplyr::summarise(
n = dplyr::n(),
dplyr::across(dplyr::any_of("size"), sum),
.groups = "drop"
)
}
calculate_sizes <- function(df, do_calc) {
if (!isTRUE(do_calc)) return(df)
df |>
dplyr::mutate(
size = purrr::map_dbl(
fs::path(.data$LibPath, .data$Package),
\(x) sum(fs::file_size(fs::dir_ls(x, recurse = TRUE)))
)
)
}
lib <- function() {
utils::installed.packages() |>
dplyr::as_tibble()
}
lib_summary(LibPath, License)
lib_summary(LibPath, License, sizes = TRUE)
lib_summary(LibPath, License, sizes = 10)test()
# update tests
check()
# update documentationCLI
Final
lib_summary <- function(..., sizes = FALSE) {
if (!is.logical(sizes)) {
cli::cli_abort("You supplied {.val {sizes}} to {.var sizes}. It should be a {.cls logical} value, not {.obj_type_friendly {sizes}}.")
}
lib() |>
calculate_sizes(do_calc = sizes) |>
dplyr::group_by(...) |>
dplyr::summarise(
n = dplyr::n(),
dplyr::across(dplyr::any_of("size"), sum),
.groups = "drop"
)
}
calculate_sizes <- function(df, do_calc) {
if (!isTRUE(do_calc)) return(df)
cli::cli_inform(c("i" = "Calculating sizes..."))
df |>
dplyr::mutate(
size = purrr::map_dbl(
fs::path(.data$LibPath, .data$Package),
\(x) sum(fs::file_size(fs::dir_ls(x, recurse = TRUE)))
)
)
}lib_summary(LibPath, License)
lib_summary(LibPath, License, sizes = TRUE)
lib_summary(LibPath, License, sizes = 10)
lib_summary(LibPath, License, sizes = "hello")
lib_summary(LibPath, License, sizes = NULL)update tests
- snapshot for error
- groups