1

I'm still relatively new to the tidyverse and PURRR in particular. I'm working with a large dataset that combines nested and non-nested variables, and I want to use a mix of both to run a custom function. I've created dummy functions to show you what I mean, and attached the dput of an example dataset below (my apologies for the length - the nested data makes it quite long).

I have created a dummy function as well, which I attach here.

dummy.fn <- function(data, Temperature, P) {
  library(tidyverse)
  output <- sum(data$Mean_Flux*(Temperature*P))*1e-20
  return(output)
}

For each line, this function should take as input: the nested variable (here called Flux.data), and the corresponding temperature and pressure columns of the dummy dataset, and then perform this simple algebra. My function call appears as follows:

test <- mutate(dummy.df, example = dummy.fn(data = data,
                                            Temperature = Temperature,
                                            P = P))

For reasons I'm not clear on, this operation fails, as the new variable example is 0 in every line. In contrast, if I loop through the data and enter the variables piecewise, I get

realfake = numeric(length = 6)
for(ii in 1:6) {
  realfake[ii] = dummy.fn(data = dummy.df$data[[ii]], 
                          Temperature = dummy.df$Temperature[ii], 
                          P = dummy.df$P[ii])
}

> print(realfake)
[1] 7.822469 7.623985 3.539980 5.221917 3.287594 3.233342

I assume that I should be using something like map here instead of my simple mutate, but I'm not sure how to deal with the combination of nested and non-nested values.

Data

structure(list(season = c(3, 3, 1, 3, 3, 1), Altitude = c("MT",  "MT", "UT", "MT", "LT", "UT"), UTC_Time = structure(c(1470534960,  1470535080, 1486178400, 1470535200, 1470535320, 1486178520), class = c("POSIXct",  "POSIXt"), tzone = ""), P = c(589.97, 694.47, 376.68,
793.04, 
865.04, 352.25), Temperature = c(275.813611111111, 283.139444444444, 
257.765, 289.519444444444, 292.139722222222, 253.445555555556 ), data = list(structure(list(Wavelength = c(297, 298, 299, 300,  301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313,  314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326,  327, 328, 329, 330, 331, 332, 333, 334, 335), Mean_Flux = c(346786701963.935, 
626061924535.969, 1126633726569.86, 1918338536593.65, 3218638104811.68, 
5449524002604.49, 9484898064254.88, 13820461435289.1, 18789920323010.3, 
23457046439017.9, 31321356743728.2, 39269949244536.7, 43130338001947.1,  52184025292495, 70006345247561.1, 80548638622151.5, 90086921275887.9, 
98153308839011.1, 103401175230574, 107092723165457, 126891646052471,  133856552389051, 141642783823426, 156803183198368, 163612559652001,  159862508775884, 157167983622353, 176116682634322, 199921821427841,  235385911421404, 249676715927087, 244363646529101, 263234566112574,  281333010457433, 265537025544415, 264427548933606, 261716337991127,  261346751545484, 270936371232163)), .Names = c("Wavelength",  "Mean_Flux"), row.names = c(NA, -39L), class = c("tbl_df", "tbl",  "data.frame")), structure(list(Wavelength = c(297, 298, 299,  300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312,  313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325,  326, 327, 328, 329, 330, 331, 332, 333, 334, 335), Mean_Flux = c(272023148470.228, 
518233644515.949, 916010533767.96, 1565942149268.24, 2636296423894.42, 
4458908538328.54, 7732598175165.71, 11246928069805.3, 15264354612772.2, 
19019479942303.6, 25375401979585.9, 31737542925788.3, 34887526297163.1, 
42129119893975.9, 56514483527965.4, 64937287417956.8, 72609089666033.8, 
79076572477421.1, 83265097132731.9, 86245421746183.5, 102168967093555,  107683345120850, 114048659026611, 126228311354447, 131687088369977,  128667372125788, 126492872763247, 141800182905113, 161124838735026,  189719525471917, 201314850025731, 197022084150577, 212449864588280,  227084864091805, 214363865211836, 213506487998880, 211375083902905,  211159274561343, 218982868415835)), .Names = c("Wavelength",  "Mean_Flux"), row.names = c(NA, -39L), class = c("tbl_df", "tbl",  "data.frame")), structure(list(Wavelength = c(297, 298, 299,  300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312,  313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325,  326, 327, 328, 329, 330, 331, 332, 333, 334, 335), Mean_Flux = c(410394304909.122, 
684669539810.892, 1223266694384.72, 1963557186902.24, 3173415679630.32, 
5225375319418.26, 8770197550604.42, 12564560749371.3, 16746223663975.8, 
20302741354880.3, 27273955562084.5, 32927242244373.8, 35498107396829.6, 
42892000512231.7, 56905187544496.6, 64359473555914.1, 71007069308199.5, 
76999513227547.8, 80793102650481, 83594711964789.9, 98448759760262.7,  102672040789990, 108260963181122, 120081240175307, 124208435953767,  120527913821372, 118657919972015, 132436584508003, 150262974200508,  176158060683751, 186058420013994, 181901206957521, 195971524806382,  208355140062145, 197630550381303, 196148678371193, 193073080285136,  192828255132408, 198899715679545)), .Names = c("Wavelength",  "Mean_Flux"), row.names = c(NA, -39L), class = c("tbl_df", "tbl",  "data.frame")), structure(list(Wavelength = c(297, 298, 299,  300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312,  313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325,  326, 327, 328, 329, 330, 331, 332, 333, 334, 335), Mean_Flux = c(163980796868.381, 
319993777135.726, 565806448670.383, 952234326748.916, 1587959819258.22, 
2695812851544.62, 4651974671696.67, 6732838672280.01, 9108259191538.7, 
11334161396213.6, 15048347042419.1, 18807745701333.9, 20611119116971.8, 
24871101998997.2, 33301542059082.8, 38225550132546.5, 42687331288244.4, 
46437737119994.5, 48840611130603.3, 50597339661460.7, 59887648834929.7, 
63080591122608.5, 66751509164580.2, 73899774592187.1, 77054311628771.3, 
75283217576979.8, 74028011494075.6, 82984607314213.3, 94322313089456.9,  111107417274732, 117870238469697, 115397830776186, 124535437696499,  133132448331574, 125690170404882, 125251095649920, 124004362932209,  123933924746482, 128592585854440)), .Names = c("Wavelength",  "Mean_Flux"), row.names = c(NA, -39L), class = c("tbl_df", "tbl",  "data.frame")), structure(list(Wavelength = c(297, 298, 299,  300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312,  313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335), Mean_Flux = c(100712940929.025, 
187783570779.657, 332125026989.09, 554468090233.077, 932816905860.644, 
1575341294540.1, 2706869628518.39, 3908049344198.36, 5283370151094.56, 
6563640428537.23, 8678472367958.32, 10859984329782.8, 11898351315247.2, 
14337012448330.9, 19158990854357.7, 21962913406403.8, 24514874805940.1, 
26658483263231.1, 28016129393510.1, 29012144950518.3, 34305093958882.3, 
36130169496521.6, 38221623366245, 42277066410473.2, 44074263806170.4, 
43065614437130.6, 42344345338240, 47429742792220.1, 53889760428024.4, 
63448640636975.8, 67287380731121.4, 65912251585506.6, 71093334728612.9, 
75986858424911.5, 71770877409023.9, 71485099232950.9, 70767556160329.4, 
70786293432652.9, 73403698511355.1)), .Names = c("Wavelength",  "Mean_Flux"), row.names = c(NA, -39L), class = c("tbl_df", "tbl",  "data.frame")), structure(list(Wavelength = c(297, 298, 299,  300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312,  313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325,  326, 327, 328, 329, 330, 331, 332, 333, 334, 335), Mean_Flux = c(415506831223.724, 
714394289371.61, 1265107560445.88, 2034229344334.13, 3270851868692.03, 
5361224636143.47, 8958831478561.96, 12807617463343.5, 17006209162760.4, 
20575733539097.5, 27539627927144.3, 33190657526915.2, 35712780965107.8, 
43093802584092.1, 57086484715867.1, 64512835082552.1, 71078004042059.9, 
76988088838629.3, 80651447645443, 83387384453277.3, 98085877600768.4,  102231449647244, 107777014668132, 119426399476690, 123467890246235,  119788043869974, 117832629287271, 131435264029990, 149046662645431,  174675091816222, 184386770145488, 180228275791829, 194042071016215,  206195905093624, 195517624876122, 194014778014755, 190866480268480,  190535424697236, 196524281939270)), .Names = c("Wavelength",  "Mean_Flux"), row.names = c(NA, -39L), class = c("tbl_df", "tbl",  "data.frame")))), row.names = c(NA, -6L), class = c("grouped_df",  "tbl_df", "tbl", "data.frame"), vars = c("season", "Altitude" ), drop
= TRUE, indices = list(c(2L, 5L), 4L, c(0L, 1L, 3L)), group_sizes = c(2L,  1L, 3L), biggest_group_size = 3L, labels = structure(list(season = c(1,  3, 3), Altitude = c("UT", "LT", "MT")), row.names = c(NA, -3L ), class = "data.frame", vars = c("season", "Altitude"), drop = TRUE, .Names = c("season",  "Altitude")), .Names = c("season", "Altitude", "UTC_Time", "P",  "Temperature", "data"))
0

1 Answer 1

1

First, no need to library(tidyverse)every time when you called your dummy.fn, it is a waste of computing resources, especially the function you wrote does not use any functions from the tidyverse. The following will work.

dummy.fn <- function(data, Temperature, P){
  output <- sum(data$Mean_Flux*(Temperature*P))*1e-20
  return(output)
}

Here is the code using the pmap_dbl from the purrr package to mimic your for loop. Notice that in your data element, there are only four data frames. To match the length of data, I subset the other two elements to only have length of four.

library(tidyverse)

pmap_dbl(list(data = dummy.df$data, Temperature = dummy.df$Temperature[1:4], P = dummy.df$P[1:4]),
         dummy.fn)

# [1] 7.8224688 7.6239845 3.5399800 0.7506653
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.