2

I have a code as given below:

dt <- ddply(dt, .(SIC,FYEAR), function(x) disAccRegFunc(x));

disAccRegFunc <- function(dt)
{
     #Compute Discreationary Accrual
     model <- lm(ACNew ~ DSALENew + PPEGTNew + ROANew, data = dt);
     dt$RES <- residuals(model);
     dt$StudRES <- studres(model);  #Calculation of studentized residuals
     return(dt)
}

In this code I am applying function disAccRegFunc over each fragment using ddply function. I have written a code using data table as given below:

        dt[,disAccRegFunc(.SD),by=.by]

But I guess this one would be slower as this one has to load .SD for each fragment into memory. Any ways to make this code efficient? Thanks.

Here goes a snapshot of the data:

structure(list(SIC = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1), FYEAR = c(1989, 1989, 1989, 1989, 1989, 1989, 1989, 
1989, 1989, 1989, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 
1990, 1990, 1990, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 
1991, 1991, 1991, 1991, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 
1992, 1992, 1992, 1992, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 
1993, 1993, 1993, 1993, 1993, 1994, 1994, 1994, 1994, 1994, 1994, 
1994, 1994, 1994, 1994, 1994, 1994, 1994, 1995, 1995, 1995, 1995, 
1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1996, 1996, 
1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 
1997, 1997, 1997, 1997, 1997), DSALENew = c(0.012602500023269, 
0.291902040273809, 0.118617033965829, 0.0893175203093097, -0.00852049231260627, 
0.0088329859025545, 0.209634378324404, 0.0830958123218592, 0.0738020724667918, 
0.109482024510348, -0.0428304666755963, -0.16588866439072, 0.121627138869356, 
0.0312269226711679, 0.101225809778869, -0.0275779376498801, 0.237572262729396, 
-0.0121992630135952, -0.00510842445824787, 0.0576157552901739, 
0.0855443732845379, 0.0872632057071098, -0.156267221848019, 0.0815859699707067, 
-0.0430624961441175, 0.153418299584922, -8.85024282853663e-05, 
0.133435797726111, -0.0184609333710255, -0.146181230961207, 0.0781112477932131, 
0.0442291827447641, 0.00716417910447766, 0.0481930614039844, 
0.0541753077810537, 0.0665705612789049, 0.118963433232041, 0.0294059514659054, 
-0.632275504735391, -0.0502141257669623, 0.0958285084007296, 
-0.0272426945849758, 0.085945755547728, 0.301778708148965, -0.0459045802393442, 
0.0169764469498758, -0.0562287270251872, 0.0669855988183644, 
0.0547472043521437, -0.067714725413364, 0.0617426162281712, 0.440429133206918, 
0.0833729932633978, -0.0280735721200387, 0.0383118213480845, 
-0.0194657903500448, 0.0626774121566572, 0.113601675703828, 0.30761369443025, 
0.109305701022796, -0.030075679207274, 0.506415816050758, 0.125916995075369, 
-0.0196319087485011, 0.0578873173006881, 0.0897072710103872, 
-0.719538572458544, -0.478305381558564, 0.173499612656267, -0.0250104170612523, 
-0.0119919744060999, -0.139720136759367, 0.0418697622544592, 
0.0593189307945807, -0.505190248796772, 0.211265167747981, -0.0227537539511344, 
-0.00186915887850474, -1.10693374422188, 0.0756610100079348, 
0.0921928012282265, 0.159792224191246, 0.035416442965031, -0.153989830860226, 
0.0147928615931956, 0.139226519337017, -0.0114289417966556, -0.0513681750613451, 
0.307342194442119, 0.218928016906197, 0.198455485939094, 0.00664589823468326, 
0.0398349694124342, 0.115581717451524, 0.584157679434734, 0.00032726458480456, 
0.039702980735921, 0.200301694587959, 0.0416528925619835, 0.36358070267058
), PPEGTNew = c(0.953973882854457, 0.467086462417758, 0.638359582619649, 
0.265758721056519, 0.689282635504338, 0.512784274929631, 0.500905533546401, 
0.302835073280151, 0.392572501564782, 0.173497722981228, 0.969752361742529, 
0.24137810910611, 0.684512774616975, 0.339065302247072, 0.70778363944283, 
0.509976924121081, 0.570903219759228, 0.30435274734949, 0.355635184458544, 
0.129286263007193, 0.895242451967063, 0.949408181259518, 0.336231706570326, 
0.744086161679677, 0.305410574372262, 0.680226270692954, 0.513867224231965, 
0.556038600187438, 0.256535686631187, 0.230172129041729, 0.205983930988692, 
0.918203511012942, 0.82228855721393, 0.940475545033404, 0.784086638101383, 
0.285761166391243, 0.66772390313165, 0.533457779729878, 0.584734315365566, 
0.261132460991096, 0.322836113207432, 0.978359054565186, 0.794293765410356, 
0.712110006643519, 0.914969230419874, 0.784890348299594, 0.540912185159973, 
0.648134411028597, 0.551530590216312, 0.67236550759716, 0.30596000552919, 
0.299670387008921, 1.10469551228341, 0.748951274604711, 0.447654169227617, 
0.541347525306156, 0.887468150139335, 0.791733854056621, 0.647061829330608, 
0.680562828917594, 0.598813088691675, 0.736365482650909, 0.388248067037364, 
0.348369262297389, 1.16189687609724, 0.77435945860875, 0.534607065609229, 
0.690092274533576, 0.254856986617654, 0.868432516383196, 0.499511832537101, 
0.631419407141095, 0.71472891353351, 0.660164441929363, 0.612835925592257, 
0.454140131190202, 0.396457871496262, 0.747663551401869, 0.663174114021572, 
0.760667025007039, 0.269186299967778, 0.73279715312704, 0.868230788503526, 
0.409463525993566, 0.663444240909767, 0.725598526703499, 0.691893877198289, 
0.674017924092229, 0.598189952656007, 0.373202231080638, 0.775171207926563, 
1.22658359293873, 0.805768957177408, 0.220152354570637, 0.591156955236735, 
0.832888368327678, 0.431280406066905, 0.670032249971822, 0.687603305785124, 
0.705083646971962), ACNew = c(0.0395945606344065, 0.0664252010367515, 
-0.0301384111110581, 0.00254137886094096, -0.0265658063511183, 
0.0166295681258759, 0.017084585460487, -0.2362842156747, 0.0046734821614855, 
0.226591136287904, 0.0173969224465998, 0.17431895770919, 0.0477768543970679, 
0.0700759573794704, -0.00381011122461684, 0.0327360752907108, 
0.0270528951957744, 0.0692339617421051, 0.145256938943222, -0.012902437321042, 
0.0731930466605672, 0.0408395008950757, 0.220439654644541, -0.0044062389767342, 
0.275945462397434, 0.0790790446221029, -0.0311086035423097, -0.0284790074946835, 
0.0561202541758336, 0.139409843285499, 0.0526540633186986, 0.0137318040290603, 
-0.00597014925373134, 0.0544822559172043, -0.00638549410303916, 
0.355472733026265, 0.0192527105080905, 0.0449544306577358, 0.06393425639316, 
0.000712762608473587, 0.128074844252703, 0.0703969102602978, 
0.0250088059175766, 0.0264988655878261, 0.0102073256579694, 0.0162804709314325, 
0.848230088495575, 0.0312279981947237, 0.00828979750442554, 0.0349715025339877, 
0.100239598212229, 0.187866678544612, 0.0916311961658344, -0.0464666021297193, 
0.322474902683876, 0.1259809866102, -0.0111675601060711, 0.000467070106532058, 
0.0368932038834952, 0.0957255259448906, 0.00633745304077966, 
0.0479452127623754, 0.134842513781169, -0.0118286894983082, 0.0424276408533741, 
0.0698772426817753, 0.341384282624369, 0.0355627399474641, 0.216942870596813, 
-0.0462706920716694, 0.036495723805627, 0.147967995488351, -0.0337729047054296, 
-0.0368829266668886, 0.0225649771950469, 0.0382707722793551, 
0.0477004248403114, -0.00872274143302181, 0.551001540832049, 
0.0122219662648135, 0.156750507022499, 0.0566504760333187, -0.00385661619957765, 
0.0477623445351992, 0.00240292197259525, 0.122283609576427, 0.0188393898803094, 
0.0593513325987576, 0.565895582777576, 0.285157733474218, 0.155034241585313, 
-0.149117341640706, -0.017095841039503, -0.126246537396122, -0.2505725078781, 
-0.0850198942418657, -0.00165680116879791, 0.0528700778192669, 
-0.044297520661157, -0.161513388917902), ROANew = c(0.163292659090274, 
-0.226041735894198, 0.0452896804759701, -0.064034058820974, 0.0921216374505778, 
0.0575910680846553, 0.0444595485158336, -0.114887086165315, 0.0114889769803185, 
-0.0696064274871339, 0.0740157108076805, -0.186354769037832, 
0.0513349954186111, -0.235757991349298, 0.0756937929151935, 0.0442740147504638, 
0.0495950382782889, -0.0935696049702564, -0.0920108683581228, 
-0.0784272644737761, 0.070448307410796, 0.0760680791941646, -0.470699895903496, 
0.0512729475637624, -0.368708742056882, 0.0996120773321018, 0.0328344008938745, 
0.0483339285972892, -0.176730866139797, -0.172129041729365, -0.0340218336790418, 
0.0206509300902148, 0.0624875621890547, 0.0590244519545298, 0.0221510307432041, 
-0.425699987107814, 0.132221178502936, 0.0329197320742286, -0.0736185481498404, 
-0.22114647865573, -0.185582948611794, 0.0310091128169936, 0.0352236703064459, 
0.0579615678704388, 0.0335740400118082, 0.0240287571367621, -1.15527569775357, 
0.11279120338079, 0.0641595786019602, -0.0181738389010379, -0.290547389761784, 
-0.220156025300024, 0.0394543687368033, 0.0100032268473701, -0.58840401557058, 
0.0152936519406099, 0.0802071603157223, 0.0187610177761631, -1.2452733776188, 
0.171864696070445, 0.0332552948222355, -0.0172610138205074, -0.513469255546958, 
0.0195868685219487, 0.0165811580607801, 0.0346238589864652, -0.301730353280461, 
0.0856334613053142, -0.533258396245044, 0.0846717678699951, 0.0380945477528242, 
-0.990166014592365, 0.1437311990952, 0.00963128169279762, 0.0101235097874349, 
-0.365303235587282, -0.0863005431502716, 0.029595015576324, -1.14576271186441, 
0.0828918068033479, -0.572186735912356, 0.144467323379801, 0.0397204624360213, 
-0.0796039075586653, 0.0361253877308557, 0.16427255985267, 0.0364040962710107, 
-0.0108940888920407, -0.601575652723907, -0.243145420678573, 
0.0495410170479382, -0.144963655244029, 0.0926992934035187, -0.0573407202216066, 
0.119598363703979, 0.096224400158465, -0.0436491798834214, 0.0647035511327484, 
0.160661157024793, 0.0546001332589004)), .Names = c("SIC", "FYEAR", 
"DSALENew", "PPEGTNew", "ACNew", "ROANew"), row.names = c(NA, 
100L), class = "data.frame")
2
  • 1
    Have you benchmarked this or is it just a hunch that this will be slow? Commented Mar 23, 2014 at 9:14
  • It takes me a couple of seconds, to run a table of 8ok rows. Not sure if a more efficient solution exists. Commented Mar 23, 2014 at 9:22

1 Answer 1

7

It would be better to rewrite your function and make use of :=:

library(MASS)
disAccRegFunc <- function(dt)
{
  model <- lm(ACNew ~ DSALENew + PPEGTNew + ROANew, data = dt);
  RES <- residuals(model);
  StudRES <- studres(model);  #Calculation of studentized residuals
  list(RES, StudRES)
}

DT[,c("Res", "StudRes") := disAccRegFunc(.SD), by = list(SIC, FYEAR)]

This could be made faster by using lm.fit and calculating the Studentized residuals "manually".

PS: Why you'd think that ddply could be faster than data.table is beyond my understanding.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.