2
library(rlang)
library(dplyr)
library(lubridate)

example = tibble(
  date = today() + c(1:6),
  foo = rnorm(6), 
)

do.some.stuff <- function(data, foo.col){
  sum.col = parse_expr(paste(expr_text(enexpr(foo.col)), "sum", sep="."))
  max.col = parse_expr(paste(expr_text(enexpr(foo.col)), "max", sep="."))
  cnt.col = parse_expr(paste(expr_text(enexpr(foo.col)), "cnt", sep="."))
  
  select(data, date, {{ foo.col }}) %>% 
    filter(!is.na(date) & !is.na({{ foo.col }})) %>% mutate(
      "{{ foo.col }}.cnt" := cumsum( !is.na({{ foo.col }}) ),
      "{{ foo.col }}.sum" := cumsum({{ foo.col }}),
      "{{ foo.col }}.max" := cummax( {{ sum.col }} ),
      "{{ foo.col }}.mu" :=  {{ sum.col }} / {{ cnt.col }}
    )
}

do.some.stuff(example, foo)

So the above code works just fine, but it is kind of ugly, particularly the three parse_expr lines. i could rewrite the function as:

do.some.stuff <- function(data, foo.col){
  sum.col = paste(expr_text(enexpr(foo.col)), "sum", sep=".")
  max.col = paste(expr_text(enexpr(foo.col)), "max", sep=".")
  cnt.col = paste(expr_text(enexpr(foo.col)), "cnt", sep=".")
  
  select(data, date, {{ foo.col }}) %>% 
    filter(!is.na(date) & !is.na({{ foo.col }})) %>% mutate(
      cnt.col := cumsum( !is.na({{ foo.col }}) ),
      sum.col := cumsum({{ foo.col }}),
      max.col := cummax( {{ parse_expr(sum.col) }} ),
      "{{ foo.col }}.mu" :=  {{ parse_expr(sum.col) }} / {{ parse_expr(cnt.col) }}
    )
}

But it's not a lot better. Is there any other ways to do accomplish this same behavior (I don't want to change the shape of the df, that part is not up to me) but kick the rlang dependency? This works just fine for now but I would like something cleaner / easier to read if it is possible. If it wasn't obvious, I am newish to metaprogramming in R although I do have experience in other languages.

2 Answers 2

3

Use across with the .names argument or if foo_cnt, etc. with an underscore is ok then just omit the .names argument since that is the default.

library(dplyr)
library(tibble)

do.some.stuff.2 <- function(data, col) {
  cnt <- function(x) cumsum(!is.na(x))
  mx <- function(x) cummax(cumsum(x))      
  mu <- function(x) cumsum(x) / cnt(x)
  data %>%
    select(date, {{col}}) %>%
    filter(!is.na(date) & !is.na({{col}})) %>%
    mutate(across({{col}}, lst(cnt, sum=cumsum, max=mx, mu), .names = "{.col}.{.fn}" ))
}
# test
do.some.stuff.2(example, foo)

giving:

# A tibble: 6 x 6
  date             foo foo.cnt   foo.sum   foo.max    foo.mu
  <date>         <dbl>   <int>     <dbl>     <dbl>     <dbl>
1 2021-02-11 -0.000202       1 -0.000202 -0.000202 -0.000202
2 2021-02-12  0.363          2  0.363     0.363     0.181   
3 2021-02-13  1.27           3  1.63      1.63      0.543   
4 2021-02-14  1.50           4  3.13      3.13      0.781   
5 2021-02-15  1.00           5  4.13      4.13      0.826   
6 2021-02-16 -0.458          6  3.67      4.13      0.612 
Sign up to request clarification or add additional context in comments.

4 Comments

This is beautiful thought the foo.max is calculated by applying cummax on foo.sum not foo
Why do you need to write {{col}} with two curly braces?
@skan, The curly-curly operator is rlang syntax for referring to arguments passed using nonstandard evaluation. See help("{{") .
@Sinh Nguyen, Although it equals that that is not what it is dong. It applies mx to foo and where mx is mx <- function(x) cummax(cumsum(x))
1

This could be a simpler version of

library(rlang)
library(dplyr)
library(lubridate)

example = tibble(
  date = today() + c(1:6),
  foo = rnorm(6), 
)

# This is your initial version of the code.
do.some.stuff <- function(data, foo.col){
  sum.col = parse_expr(paste(expr_text(enexpr(foo.col)), "sum", sep="."))
  max.col = parse_expr(paste(expr_text(enexpr(foo.col)), "max", sep="."))
  cnt.col = parse_expr(paste(expr_text(enexpr(foo.col)), "cnt", sep="."))
  
  select(data, date, {{ foo.col }}) %>% 
    filter(!is.na(date) & !is.na({{ foo.col }})) %>% mutate(
      "{{ foo.col }}.cnt" := cumsum( !is.na({{ foo.col }}) ),
      "{{ foo.col }}.sum" := cumsum({{ foo.col }}),
      "{{ foo.col }}.max" := cummax( {{ sum.col }} ),
      "{{ foo.col }}.mu" :=  {{ sum.col }} / {{ cnt.col }}
    )
}

# Here is my version where foo.col is a character param
do.some.stuff_2 <- function(data, foo.col) {
  data %>% select(date, !!foo.col) %>% 
    filter(!is.na(date) & !is.na(!!foo.col)) %>%
    mutate(
      # Here as foo.col is a character to add new column just combine them together
      !!paste0(foo.col, ".cnt") := cumsum(!is.na(.data[[foo.col]])),
      !!paste0(foo.col, ".sum") := cumsum(.data[[foo.col]]),
      !!paste0(foo.col, ".max") := cummax(.data[[paste0(foo.col, ".sum")]]),
      !!paste0(foo.col, ".mu") :=  .data[[paste0(foo.col, ".sum")]] / 
                                   .data[[paste0(foo.col, ".cnt")]]
    )
}

identical(do.some.stuff(example, foo), do.some.stuff_2(example, "foo"))

You can learn more here: https://dplyr.tidyverse.org/articles/programming.html

2 Comments

Is accessing .data directly not frowned upon? I am unfamiliar with R's specific conventions or etiquette but in many other languages accessing a dot or underscore variable directly would be frowned upon as an encapsulation violation. Is this type of behavior normal with dplyr?
You can check more document here. .data is an internal reference of dplyr dplyr.tidyverse.org/articles/programming.html

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.