3

I wrote a small function to aggregate several columns by a discrete variable:

library(data.table)

onewayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL){
  .x <- deparse(substitute(x))
  .weight <- deparse(substitute(weight))
  .displacement <- deparse(substitute(displacement))
  .by <- deparse(substitute(by)) # Does not work with multiple variables!
  
  cols <- c(.weight, .displacement)
  cols <- cols[cols != "NULL"]
  
  .xby <- c(.x, .by)
  .xby <- .xby[.xby != "NULL"]
  
  data.table::data.table(df)[, lapply(.SD, sum, na.rm = TRUE), by = .xby, .SDcols = cols][]
}

The sums of the variables wt and disp are returned (grouped by cyl, and am):

onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = am)

#>    cyl am     wt   disp
#> 1:   6  1  8.265  465.0
#> 2:   4  1 16.338  748.9
#> 3:   6  0 13.555  818.2
#> 4:   8  0 49.249 4291.4
#> 5:   4  0  8.805  407.6
#> 6:   8  1  6.740  652.0

The following also returns the correct result:


onewayfn(mtcars, cyl, weight = wt, displacement = disp)
#>    cyl     wt   disp
#> 1:   6 21.820 1283.2
#> 2:   4 25.143 1156.5
#> 3:   8 55.989 4943.4

However, the function returns an error if I add multiple variables to by:

onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = list(am,vs))

I would like to obtain the same result as above but now grouped by cyl, am, and vs. How can I rewrite onewayfn() to do this?

2 Answers 2

1

We can pass the by as a vector of strings

onewayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL){
  .x <- deparse(substitute(x))
  .weight <- deparse(substitute(weight))
  .displacement <- deparse(substitute(displacement))
  #.by <- deparse(substitute(by)) # Does not work with multiple variables!
  
  cols <- c(.weight, .displacement)
  cols <- cols[cols != "NULL"]
  
  .xby <- c(.x, by)
  .xby <- .xby[.xby != "NULL"]
  
  data.table::data.table(df)[, lapply(.SD, sum, na.rm = TRUE), by = .xby, .SDcols = cols][]
}

-testing

onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = c("am","vs"))

#   cyl am vs     wt   disp
#1:   6  1  0  8.265  465.0
#2:   4  1  1 14.198  628.6
#3:   6  0  1 13.555  818.2
#4:   8  0  0 49.249 4291.4
#5:   4  0  1  8.805  407.6
#6:   4  1  0  2.140  120.3
#7:   8  1  0  6.740  652.0

Or another option is to evaluate a string

newayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL){
  
  dfname <- deparse(substitute(df))
  .x <- deparse(substitute(x))
  .weight <- deparse(substitute(weight))
  .displacement <- deparse(substitute(displacement))
  .by <- deparse(substitute(by)) # Does not work with multiple variables!
  
 
  cols <- c(.weight, .displacement)
  cols <- cols[cols != "NULL"]
  cols <- paste(dQuote(cols, FALSE), collapse=",")
  cols <- glue::glue("c({cols})")
  .by <- gsub("list\\(|\\)", "", .by)
  .xby <- c(.x, .by)
  .xby <- .xby[.xby != "NULL"]
  .xby1 <- paste0("c(", gsub("(\\w+)", "'\\1'", toString(.xby)), ")")
  str1 <- glue::glue('data.table::data.table({dfname})[, lapply(.SD, sum, na.rm = TRUE), by = {.xby1}, .SDcols = {cols}][]')
  print(str1)
  eval(parse(text = str1))
}

-testing

onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = list(am, vs))
#data.table::data.table(mtcars)[, lapply(.SD, sum, na.rm = TRUE), by = c('cyl', 'am', 'vs'), .SDcols = c("wt","disp")][]
#   cyl am vs     wt   disp
#1:   6  1  0  8.265  465.0
#2:   4  1  1 14.198  628.6
#3:   6  0  1 13.555  818.2
#4:   8  0  0 49.249 4291.4
#5:   4  0  1  8.805  407.6
#6:   4  1  0  2.140  120.3
#7:   8  1  0  6.740  652.0
 
onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = am)
#data.table::data.table(mtcars)[, lapply(.SD, sum, na.rm = TRUE), by = c('cyl', 'am'), .SDcols = c("wt","disp")][]
#   cyl am     wt   disp
#1:   6  1  8.265  465.0
#2:   4  1 16.338  748.9
#3:   6  0 13.555  818.2
#4:   8  0 49.249 4291.4
#5:   4  0  8.805  407.6
#6:   8  1  6.740  652.0
Sign up to request clarification or add additional context in comments.

3 Comments

Thanks @akrun, very much appreciated. Is there a way such that I can also use: by = am, or by = list(am, vs)?
@mharinga it could be used, but it is not a direct way.
@mharinga would the updated one helps you
1

Here is another option:

onewayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL) {
    mc <- as.list(match.call())
    byl <- as.list(mc$by)

    if (length(byl) > 0L && (byl[[1L]] == as.symbol("list") || byl[[1L]] == as.symbol(".")))
        byl <- byl[-1L]

    COLS <- c()
    if (!missing(weight))
        COLS <- deparse(mc$weight)
    if (!missing(displacement))
        COLS <- c(COLS, deparse(mc$displacement))

    BY <- as.call(c(as.symbol("list"), substitute(x), byl))
    eval(bquote(data.table(df)[, lapply(.SD, sum, na.rm = TRUE), by=.(BY), .SDcols=.(COLS)]))
}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.