1

i have been trying to generalize a function where I do a number of different aggregations with different keys and then I merge them together. First an example data.table:

DT <- data.table(
                  Key1=c("giallo","giallo","giallo","verde","verde","verde","verde","verde"),
                  Key2=c("M","M","L","S","M","M","M","L"),
                  Filtro=c(1,1,1,0,0,0,1,1),
                  Var1=c(1,4,5,3,7,5,8,1),
                  Var2=c(11,24,15,33,17,45,38,21)
               )

With the help of question I wrote the following code:

Tavola <- function(s,chiave1,chiave2=NULL)
{

   Tavola1 <- s[,
                by=eval(chiave1),
                list(
                     Somma11=sum(Var1),
                     Somma12=sum(Var2),
                     Media11=mean(Var1),
                     Media12=mean(Var2)
                    )
                ]
   print(Tavola1)
   Tavola2 <- s[Filtro==1,
                by=eval(chiave1),
                list(
                      Somma21=sum(Var1),
                      Somma22=sum(Var2),
                      Media21=mean(Var1),
                      Media22=mean(Var2)
                    )
               ]
   print(Tavola2)
   Tavola3 <- s[Filtro==1 & Var1>3,
                by=eval(chiave1),
                list(
                      Somma32=sum(Var2),
                      Media32=mean(Var2)
                    )
               ]

   print(Tavola3)
   mymerge = function(x,y) merge(x,y,by=chiave1,all=TRUE)
   TavolaFinale <- Reduce(mymerge,list(Tavola1,Tavola2,Tavola3))

   return(TavolaFinale)
}
TavolaStat <- Tavola(s=DT,chiave1="Key1")
> TavolaStat
     Key1 Somma11 Somma12  Media11  Media12 Somma21 Somma22  Media21  Media22 Somma32 Media32
1: giallo      10      50 3.333333 16.66667      10      50 3.333333 16.66667      39    19.5
2:  verde      24     154 4.800000 30.80000       9      59 4.500000 29.50000      38    38.0

which works, but when I try to generalize to actually summarize by the 2 variable keys I can't:

    Tavola <- function(s,chiave1,chiave2=NULL)
    {
    
       Tavola1 <- s[,
                    by=list(eval((chiave1)),eval(chiave2)),
                    list(
                         Somma11=sum(Var1),
                         Somma12=sum(Var2),
                         Media11=mean(Var1),
                         Media12=mean(Var2)
                        )
                    ]
       print(Tavola1)
       Tavola2 <- s[Filtro==1,
                    by=list(eval(chiave1),eval(chiave2)),
                    list(
                          Somma21=sum(Var1),
                          Somma22=sum(Var2),
                          Media21=mean(Var1),
                          Media22=mean(Var2)
                        )
                   ]
       print(Tavola2)
       Tavola3 <- s[Filtro==1 & Var1>3,
                    by=list(eval(chiave1),eval(chiave2)),
                    list(
                          Somma32=sum(Var2),
                          Media32=mean(Var2)
                        )
                   ]
    
       print(Tavola3)
       mymerge = function(x,y) merge(x,y,by=c(chiave1,chiave2),all=TRUE)
       TavolaFinale <- Reduce(mymerge,list(Tavola1,Tavola2,Tavola3))
    
       return(TavolaFinale)
    }
> TavolaStat <- Tavola(s=DT,chiave1="Key1")
 Show Traceback
 
 Rerun with Debug
 Error in `[.data.table`(s, , by = list(eval((chiave1)), eval(chiave2)),  : 
  column or expression 2 of 'by' or 'keyby' is type NULL. Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))] > 
> TavolaStat <- Tavola(s=DT,chiave1="Key1",chiave2="Key2")
 Show Traceback
 
 Rerun with Debug
 Error in `[.data.table`(s, , by = list(eval((chiave1)), eval(chiave2)),  : 
  The items in the 'by' or 'keyby' list are length (1,1). Each must be same length as rows in x or number of rows returned by i (8). 

How do I fix this? Thanks in advance

3
  • 1
    Thanks for the edit. Couldn't you use c(chiave1,chiave2) instead of this list and eval business? With c(), when one element is NULL, it is dropped from the vector. Commented Aug 20, 2015 at 13:57
  • @Frank: the first function has chiave1 and chiave2 as arguments but only chiave1 is used in the function body. The second function tries to use both Commented Aug 20, 2015 at 13:57
  • 1
    @Frank, thanks it works. I had always used list in the by for data.table Commented Aug 20, 2015 at 14:06

1 Answer 1

1

How about passing those keys as 1 vector instead of 2?

Tavola <- function(s,chiaves)
{

  Tavola1 <- s[,
               list(
                 Somma11=sum(Var1),
                 Somma12=sum(Var2),
                 Media11=mean(Var1),
                 Media12=mean(Var2)),
                 by=chiaves
               ]
  print(Tavola1)
  Tavola2 <- s[Filtro==1,
               list(
                 Somma21=sum(Var1),
                 Somma22=sum(Var2),
                 Media21=mean(Var1),
                 Media22=mean(Var2)),
                 by=chiaves
               ]
  print(Tavola2)
  Tavola3 <- s[Filtro==1 & Var1>3,
               list(
                 Somma32=sum(Var2),
                 Media32=mean(Var2)),
                 by=chiaves
               ]

  print(Tavola3)
  mymerge = function(x,y) merge(x,y,by=(chiaves),all=TRUE)
  TavolaFinale <- Reduce(mymerge,list(Tavola1,Tavola2,Tavola3))

  return(TavolaFinale)
}
TavolaStat <- Tavola(s=DT,c("Key1","Key2"))
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.