1

I am trying to create a function with one of column of my dataframe outplacement, because similar calculations I would need for other columns also.

numeric_fun<-function(dataset,grp_var,var){

  require("dplyr")

    dataset%>%select(grp_var,var)%>% group_by_(grp_var)%>%
  summarize(
    'q25' = quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
    'median' =round(quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
    'avg' = round( mean(var, na.rm=TRUE), 0),
    'q75' = quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4] ,
    'n' = sum(!is.na(var))
  )%>% 
  mutate(
    q25 = ifelse( n < 5, "--", paste(q25,"%" )),
    median = ifelse( n < 5, "--", paste(median,"%")),
    avg = ifelse(n < 5, "--", paste(avg,"%")),
    q75 = ifelse( n < 5, "--", paste(q75,"%")),
    n = n
  ) %>% 
  rename( "Industry"=grp_var,
          "25%" = q25,
          "75%" = q75
  )

}

numeric_fun(outplacement,"Q7_1","Q8")

But this ones is not working, any other efficient ways?

output of dput

structure(list(Q7_1 = structure(c(NA, NA, NA, 5L, 5L, 14L, NA, 
1L, 9L, 13L, 1L, NA, 3L, 13L, 13L, 13L, 12L, 2L, 11L, 13L, 10L, 
11L, NA, 1L, 4L, NA, 5L, NA, 5L, 4L, 5L, 9L, 2L, 4L, 13L, 10L, 
13L, 13L, NA, 11L, NA, 1L, 11L, NA, 5L, NA, 1L, NA, 9L, 3L, 1L, 
1L, 10L, 1L, NA, 5L, NA, NA, 2L, NA, 6L, 6L, NA, 13L, 14L, NA, 
NA, 14L, 8L, 11L, 11L, 8L, 12L, 13L, NA, 3L, 11L, 3L, 11L, 1L, 
NA, 9L, NA, 10L, 6L, 1L, 5L, 3L, 1L, 13L, 4L, 14L, NA, 13L), .Label = c("Banking/Financial Services", 
"Chemicals", "Consumer Goods", "Energy", "High Tech", "Insurance/Reinsurance", 
"Life Sciences", "Logistics", "Mining & Metals", "Other Manufacturing", 
"Other Non-Manufacturing", "Retail & Wholesale", "Services (Non-Financial)", "Transportation Equipment"), class = "factor"), Q8 = c(NA, NA, 
NA, NA, NA, NA, NA, 2L, 3L, 3L, 6L, NA, 5L, 4L, 2L, 5L, 6L, 2L, 
2L, 3L, 2L, 5L, NA, 3L, 1L, NA, 3L, NA, 1L, 3L, 4L, 4L, 2L, 4L, 
1L, 3L, 2L, 3L, NA, 2L, NA, 4L, 4L, NA, 1L, NA, 3L, NA, 1L, 3L, 
5L, 2L, 3L, 1L, NA, 6L, NA, NA, 4L, NA, 1L, 5L, NA, 2L, 1L, NA, 
NA, 2L, 6L, 6L, 2L, 6L, 3L, 5L, NA, 5L, 2L, 1L, 3L, 3L, NA, 3L, 
NA, 3L, 3L, 6L, 4L, 1L, 4L, 6L, 3L, 5L, NA, 5L), Q9 = c(3L, 1L, 
NA, 1L, 3L, 3L, NA, 3L, 3L, 1L, 1L, NA, 3L, 2L, 2L, 3L, 2L, 3L, 
2L, 2L, 2L, 1L, NA, 3L, 1L, NA, 1L, NA, 1L, 2L, 1L, 2L, 3L, 1L, 
1L, 1L, 3L, 3L, NA, 3L, NA, 2L, 2L, NA, 1L, NA, 1L, NA, 1L, 2L, 
2L, 1L, 2L, 3L, NA, 1L, NA, NA, 2L, NA, 2L, 2L, NA, 2L, 2L, NA, 
NA, 1L, 3L, 1L, 3L, 3L, 1L, 3L, NA, 1L, 3L, 1L, 1L, 3L, NA, 1L, 
NA, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, NA, 2L), Q10 = c(NA, 
1L, NA, 1L, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, 1L, 2L, NA, 1L, 
NA, 1L, 1L, 2L, 2L, NA, NA, 2L, NA, 2L, NA, 2L, 1L, NA, 1L, NA, 
1L, 1L, 1L, NA, NA, NA, NA, NA, 2L, 1L, NA, 1L, NA, 2L, NA, 2L, 
2L, 2L, 1L, 2L, 2L, NA, 1L, NA, NA, 2L, NA, 2L, 1L, NA, 1L, 2L, 
NA, NA, 1L, 1L, NA, 1L, NA, NA, 2L, NA, NA, 1L, 1L, 1L, 2L, NA, 
1L, NA, 1L, 2L, 2L, 1L, 1L, NA, 1L, NA, 2L, NA, 1L)), row.names = c(NA, 
-94L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 
0x0000000000090788>, .Names = c("Q7_1", 
"Q8", "Q9", "Q10"))
3
  • Right now you are feeding a character string (the content of var, which is "Q7_1") to functions like mean, which expect a numeric vector. dplyr can work with these character strings for select and similar functions, but quantile and mean can't. Commented Feb 6, 2018 at 12:49
  • Try with enquo/quo_name and !! Commented Feb 6, 2018 at 12:58
  • I am using two columns here, one for grouping 'grp_var' and other numeric column 'var'. Q7_1 is the variable I need for grouping the output. Mean and all other calculations I am performing on other numeric columns. Please let me know if there is any other way to do this Commented Feb 6, 2018 at 13:03

2 Answers 2

2

Using the enquo/!! from dplyr we can remodel the function as

library(dplyr)
numeric_fun<-function(dataset,grp_var,var){

      grp_var <- enquo(grp_var)
      var <- enquo(var)


    dataset %>%
          select(!! grp_var, !!var) %>%
          group_by(!! grp_var) %>%
          summarise(
               q25 = quantile(!! var,  type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
           median =round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
               avg = round( mean(!! var, na.rm=TRUE), 0),
               q75 = quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4] ,
               n = sum(!is.na(!!var))) %>%

         mutate(
              q25 = ifelse( n < 5, "--", paste0(q25,"%" )),
              median = ifelse( n < 5, "--", paste0(median,"%")),
              avg = ifelse(n < 5, "--", paste0(avg,"%")),
              q75 = ifelse( n < 5, "--", paste0(q75,"%"))

          ) %>% 
        rename(Industry= !!grp_var,
          `25%` = q25,
          `75%` = q75
  )



}

-run the function

numeric_fun(df1, Q7_1, Q8)
# A tibble: 14 x 6
#   Industry                   `25%` median avg   `75%`     n
#   <fctr>                     <chr> <chr>  <chr> <chr> <int>
# 1 Banking/Financial Services 2%    3%     4%    5%       11
# 2 Chemicals                  --    --     --    --        3
# 3 Consumer Goods             1%    3%     3%    5%        5
# 4 Energy                     --    --     --    --        4
# 5 High Tech                  1%    4%     3%    4.5%      6
# 6 Insurance/Reinsurance      --    --     --    --        3
# 7 Logistics                  --    --     --    --        2
# 8 Mining & Metals            --    --     --    --        4
# 9 Other Manufacturing        --    --     --    --        4
#10 Other Non-Manufacturing    2%    2%     3%    4.75%     8
#11 Retail & Wholesale         --    --     --    --        2
#12 Services (Non-Financial)   2%    3%     3%    5%       12
#13 Transportation Equipment   --    --     --    --        3
#14 <NA>                       --    --     --    --        0
Sign up to request clarification or add additional context in comments.

3 Comments

It also needs rlang and data.table.
@G.Grothendieck The functions enquo are loaded without explicitly calling rlang
Just checked the NAMESPACE file of dplyr on github and you are correct. rlang must be installed but it does not have to be loaded. data.table must still be loaded since the input is data.table.
2

The problem is that the code was using character strings in inappropriate contexts. We can use sym and !! from the rlang package to translate them. Add the statements marked with ##and then use !!grp_var and !!var in place of grp_var and var everywhere. Also changed group_by_ to group_by, reformatted, changed the require to library (see next paragraph) and added library statements for data.table and rlang.

Note that library is preferable to require unless within an if. That way if the package is missing it will fail at the library statement making the cause obvious. On the other hand with require it will fail further down making it harder to debug.

library(data.table) ##
library(dplyr)


numeric_fun <- function(dataset, grp_var, var) {

  grp_var <- sym(grp_var)
  var <- sym(var)

  dataset %>%
    select(!!grp_var,!!var) %>% 
    group_by(!!grp_var) %>%
    summarize(
      'q25' = quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm = TRUE)[2],
      'median' = round(quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
      'avg' = round( mean(!!var, na.rm = TRUE), 0),
      'q75' = quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm = TRUE)[4] ,
      'n' = sum(!is.na(!!var))
    ) %>% 
    mutate(
      q25 = ifelse( n < 5, "--", paste(q25, "%" )),
      median = ifelse( n < 5, "--", paste(median, "%")),
      avg = ifelse(n < 5, "--", paste(avg, "%")),
      q75 = ifelse( n < 5, "--", paste(q75, "%")),
      n = n
    ) %>% 
    rename( "Industry" = !!grp_var,
            "25%" = q25,
            "75%" = q75
    )

}

numeric_fun(outplacement,"Q7_1","Q8")

giving:

# A tibble: 14 x 6
   Industry                   `25%` median avg   `75%`      n
   <fctr>                     <chr> <chr>  <chr> <chr>  <int>
 1 Banking/Financial Services 2 %   3 %    4 %   5 %       11
 2 Chemicals                  --    --     --    --         3
 3 Consumer Goods             1 %   3 %    3 %   5 %        5
 4 Energy                     --    --     --    --         4
 5 High Tech                  1 %   4 %    3 %   4.5 %      6
 6 Insurance/Reinsurance      --    --     --    --         3
 7 Logistics                  --    --     --    --         2
 8 Mining & Metals            --    --     --    --         4
 9 Other Manufacturing        --    --     --    --         4
10 Other Non-Manufacturing    2 %   2 %    3 %   4.75 %     8
11 Retail & Wholesale         --    --     --    --         2
12 Services (Non-Financial)   2 %   3 %    3 %   5 %       12
13 Transportation Equipment   --    --     --    --         3
14 <NA>                       --    --     --    --         0

Note

dput does not work with objects having external pointers such as data.table objects so we used this:

outplacement <- 
structure(list(Q7_1 = structure(c(NA, NA, NA, 5L, 5L, 14L, NA, 
1L, 9L, 13L, 1L, NA, 3L, 13L, 13L, 13L, 12L, 2L, 11L, 13L, 10L, 
11L, NA, 1L, 4L, NA, 5L, NA, 5L, 4L, 5L, 9L, 2L, 4L, 13L, 10L, 
13L, 13L, NA, 11L, NA, 1L, 11L, NA, 5L, NA, 1L, NA, 9L, 3L, 1L, 
1L, 10L, 1L, NA, 5L, NA, NA, 2L, NA, 6L, 6L, NA, 13L, 14L, NA, 
NA, 14L, 8L, 11L, 11L, 8L, 12L, 13L, NA, 3L, 11L, 3L, 11L, 1L, 
NA, 9L, NA, 10L, 6L, 1L, 5L, 3L, 1L, 13L, 4L, 14L, NA, 13L), .Label = c("Banking/Financial Services", 
"Chemicals", "Consumer Goods", "Energy", "High Tech", "Insurance/Reinsurance", 
"Life Sciences", "Logistics", "Mining & Metals", "Other Manufacturing", 
"Other Non-Manufacturing", "Retail & Wholesale", "Services (Non-Financial)", "Transportation Equipment"), class = "factor"), Q8 = c(NA, NA, 
NA, NA, NA, NA, NA, 2L, 3L, 3L, 6L, NA, 5L, 4L, 2L, 5L, 6L, 2L, 
2L, 3L, 2L, 5L, NA, 3L, 1L, NA, 3L, NA, 1L, 3L, 4L, 4L, 2L, 4L, 
1L, 3L, 2L, 3L, NA, 2L, NA, 4L, 4L, NA, 1L, NA, 3L, NA, 1L, 3L, 
5L, 2L, 3L, 1L, NA, 6L, NA, NA, 4L, NA, 1L, 5L, NA, 2L, 1L, NA, 
NA, 2L, 6L, 6L, 2L, 6L, 3L, 5L, NA, 5L, 2L, 1L, 3L, 3L, NA, 3L, 
NA, 3L, 3L, 6L, 4L, 1L, 4L, 6L, 3L, 5L, NA, 5L), Q9 = c(3L, 1L, 
NA, 1L, 3L, 3L, NA, 3L, 3L, 1L, 1L, NA, 3L, 2L, 2L, 3L, 2L, 3L, 
2L, 2L, 2L, 1L, NA, 3L, 1L, NA, 1L, NA, 1L, 2L, 1L, 2L, 3L, 1L, 
1L, 1L, 3L, 3L, NA, 3L, NA, 2L, 2L, NA, 1L, NA, 1L, NA, 1L, 2L, 
2L, 1L, 2L, 3L, NA, 1L, NA, NA, 2L, NA, 2L, 2L, NA, 2L, 2L, NA, 
NA, 1L, 3L, 1L, 3L, 3L, 1L, 3L, NA, 1L, 3L, 1L, 1L, 3L, NA, 1L, 
NA, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, NA, 2L), Q10 = c(NA, 
1L, NA, 1L, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, 1L, 2L, NA, 1L, 
NA, 1L, 1L, 2L, 2L, NA, NA, 2L, NA, 2L, NA, 2L, 1L, NA, 1L, NA, 
1L, 1L, 1L, NA, NA, NA, NA, NA, 2L, 1L, NA, 1L, NA, 2L, NA, 2L, 
2L, 2L, 1L, 2L, 2L, NA, 1L, NA, NA, 2L, NA, 2L, 1L, NA, 1L, 2L, 
NA, NA, 1L, 1L, NA, 1L, NA, NA, 2L, NA, NA, 1L, 1L, 1L, 2L, NA, 
1L, NA, 1L, 2L, 2L, 1L, 1L, NA, 1L, NA, 2L, NA, 1L)), row.names = c(NA, 
-94L), class = "data.frame", .Names = c("Q7_1", "Q8", "Q9", "Q10"))

library(data.table)
outplacement <- as.data.table(outplacement)

4 Comments

I think there is a typo in your function i.e. before the rename the #%>% is commented
I am trying create function for categorical variables using table() function. and in this case how do I pass the variables. enquo and sym would not work in this case. Data is same - outplacement. T1.1<-with(data,table(!!grp_var,!!var))
tidyeval/rlang constructs can mostly only be used with packages that support them such as dplyr; otherwise, as a rule you should not use non-standard evaluation. Rather than pass Type in the following example, pass "Type", i.e. a character string: mytab <- function(data, var) table(data[[var]]); mytab(CO2, "Type")
Actually yes its working after I added "Type" for two variables also, Thanks

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.