Using the tidyverse in your package 🧹

Tidyverse

Demo - not part of package

Do in a new file eg scratch/tidyverse-testing.R and add scratch to Rbuildignore

data masking

var_summary <- function(data, var) {
  data |> 
    summarise(
      min = min(var),
      max = max(var)
    )
}

mtcars |> 
  group_by(cyl) |> 
  var_summary(mpg)

Error: object mpg not found

var_summary <- function(data, var) {
  data |> 
    summarise(
      min = min({{ var }}),
      max = max({{ var }})
    )
}

mtcars |> 
  group_by(cyl) |> 
  var_summary(mpg)
big_cars_summary <- function(var) {
  mtcars |> 
    filter(.data$cyl >= 6) |> 
    group_by(.data$cyl) |> 
    summarise(
      n = n(), 
      mean = mean({{ var }}), 
    )
}

big_cars_summary(disp)

Your turn solution

height_sum <- function(data, group_var) {
  data |> 
    dplyr::group_by({{ group_var }}) |> 
    dplyr::summarise(
      n = dplyr::n(),
      mean_height = mean(.data$height)
    )
}

height_sum(starwars, hair_color)

Your turn solution

height_sum <- function(data, ...) {
  data |> 
    dplyr::group_by(...) |> 
    dplyr::summarise(
      n = dplyr::n(),
      mean_height = mean(.data$height)
    )
}

height_sum(starwars, hair_color, eye_color)

Dynamic dots

var_summary <- function(data, var) {
  data |> 
    summarise(
      "{{var}}_min" := min({{ var }})
    )
}

mtcars |> 
  group_by(cyl) |> 
  var_summary(mpg)

Your turn solution

dynamic_sum <- function(data, group_var, sum_var) {
  data |> 
    dplyr::group_by({{ group_var }}) |> 
    dplyr::summarise(
      n = dplyr::n(),
      "mean_{{sum_var}}" := mean({{ sum_var }})
    )
}

dynamic_sum(starwars, hair_color, mass)

tidyselect

mtcars |>
  dplyr::group_by(cyl) |>
  dplyr::summarise(
    dplyr::across(c("mpg", "disp"), mean)
  )

cols <- c("mpg", "disp", "xyz")
mtcars |>
  dplyr::group_by(cyl) |>
  dplyr::summarise(
    dplyr::across(dplyr::all_of(cols), mean)
  )

cols <- c("mpg", "disp", "xyz")
mtcars |>
  dplyr::group_by(cyl) |>
  dplyr::summarise(
    dplyr::across(dplyr::any_of(cols), mean)
  )

Using a character vector:

summy <- function(df, cols) {
  df |> 
    group_by(.data$cyl) |> 
    summarise(
      across(all_of(cols), .fns = mean)
    )
}
summy(mtcars, c("mpg", "disp"))

Using bare names or tidy-select helpers:

summy <- function(df, cols) {
  df |> 
    group_by(.data$cyl) |> 
    summarise(
      across({{ cols }}, .fns = mean)
    )
}
summy(mtcars, c(mpg, disp))
summy(mtcars, starts_with("mp"))
summy(mtcars, where(is.numeric))

Tidyversify libminer

use_package("dplyr")

We are using the new base pipe, need R >= 4.1

lib_summary <- function(by) {
  pkgs <- lib()

  dplyr::group_by(pkgs, by) |>
    dplyr::count()
}
load_all()
lib_summary(LibPath)

Errors - no column called β€˜by’

2a - use curly-curly with bare names

lib_summary <- function(by) {
  pkgs <- lib()

  dplyr::group_by(pkgs, {{ by }}) |>
    dplyr::count()
}
load_all()
lib_summary(LibPath)

2b - use .data[[x]] with character

use_import_from("rlang", ".data")
lib_summary <- function(by) {
  pkgs <- lib()

  dplyr::group_by(pkgs, .data[[by]]) |>
    dplyr::count()
}
load_all()
lib_summary("License")

3 - add sizes

lib_summary <- function(by) {
  pkgs <- lib()
  
  pkgs <- calculate_sizes(pkgs)
  
  dplyr::group_by(pkgs, {{by}}) |>
    dplyr::summarise(
      n = dplyr::n(), 
      size = sum(size)
    )
}

calculate_sizes <- function(df) {
  df |> 
    dplyr::mutate(
      size = purrr::map_dbl(
        fs::path(LibPath, Package),
        \(x) sum(fs::file_size(fs::dir_ls(x, recurse = TRUE)))
      )
    )
}
load_all()
lib_summary(LibPath)
check() # should throw notes about undefined globals

4 - add .data

lib_summary <- function(by) {
  pkgs <- lib()
  
  pkgs <- calculate_sizes(pkgs)
  
  dplyr::group_by(pkgs, {{by}}) |>
    dplyr::summarise(
      n = dplyr::n(), 
      size = sum(.data$size)
    )
}

calculate_sizes <- function(df) {
  df |> 
    dplyr::mutate(
      size = purrr::map_dbl(
        fs::path(.data$LibPath, .data$Package),
        \(x) sum(fs::file_size(fs::dir_ls(x, recurse = TRUE)))
      )
    )
}

5 use … for multiple by’s

lib_summary <- function(...) {
  lib() |> 
    calculate_sizes() |> 
    dplyr::group_by(...) |> 
    dplyr::summarise(
      n = dplyr::n(),
      size = sum(.data$size)
    )
}

Final - make sizes conditional again

Also, drop groups so we don’t get the message any more

lib_summary <- function(..., sizes = FALSE) {
  if (!is.logical(sizes)) {
    stop("'sizes' must be logical (TRUE/FALSE).", call. = FALSE)
  }
  
  lib() |> 
    calculate_sizes(do_calc = sizes) |> 
    dplyr::group_by(...) |> 
    dplyr::summarise(
      n = dplyr::n(),
      dplyr::across(dplyr::any_of("size"), sum),
      .groups = "drop"
    )
}

calculate_sizes <- function(df, do_calc) {
  if (!isTRUE(do_calc)) return(df)
  
  df |> 
    dplyr::mutate(
      size = purrr::map_dbl(
        fs::path(.data$LibPath, .data$Package),
        \(x) sum(fs::file_size(fs::dir_ls(x, recurse = TRUE)))
      )
    )
}

lib <- function() {
  utils::installed.packages() |> 
    dplyr::as_tibble()
}

lib_summary(LibPath, License)
lib_summary(LibPath, License, sizes = TRUE)
lib_summary(LibPath, License, sizes = 10)
test()
# update tests
check()
# update documentation

CLI

Final

lib_summary <- function(..., sizes = FALSE) {
  if (!is.logical(sizes)) {
    cli::cli_abort("You supplied {.val {sizes}} to {.var sizes}. It should be a {.cls logical} value, not {.obj_type_friendly {sizes}}.")
  }
  
  lib() |> 
    calculate_sizes(do_calc = sizes) |> 
    dplyr::group_by(...) |> 
    dplyr::summarise(
      n = dplyr::n(),
      dplyr::across(dplyr::any_of("size"), sum),
      .groups = "drop"
    )
}

calculate_sizes <- function(df, do_calc) {
  if (!isTRUE(do_calc)) return(df)
  
  cli::cli_inform(c("i" = "Calculating sizes..."))
  
  df |> 
    dplyr::mutate(
      size = purrr::map_dbl(
        fs::path(.data$LibPath, .data$Package),
        \(x) sum(fs::file_size(fs::dir_ls(x, recurse = TRUE)))
      )
    )
}
lib_summary(LibPath, License)
lib_summary(LibPath, License, sizes = TRUE)
lib_summary(LibPath, License, sizes = 10)
lib_summary(LibPath, License, sizes = "hello")
lib_summary(LibPath, License, sizes = NULL)

update tests

  • snapshot for error
  • groups