2

I have a dataframe df

    structure(list(ID = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 
9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 
13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 
17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 20L, 20L, 20L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 57L, 57L, 62L, 62L, 62L, 70L, 70L, 70L, 
70L, 70L, 2L, 2L, 2L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 46L, 
46L, 46L, 46L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L), .Label = c("AU-Tum", 
"BE-Bra", "BR-Sa3", "CA-Ca1", "CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", 
"CA-NS1", "CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", 
"CA-Oas", "CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", 
"CA-SF3", "CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", 
"CZ-Bk1", "DE-Har", "DE-Wet", "DK-Sor", "FI-Hyy", "FR-Hes", "FR-Pue", 
"ID-Pag", "IT-Ro1", "IT-Ro2", "IT-Sro", "JP-Tak", "JP-Tef", "NL-Loo", 
"SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1", "UK-Gri", 
"US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3", "US-Dk3", "US-Fmf", "US-Fwf", 
"US-Ha1", "US-Ha2", "US-Ho1", "US-Ho2", "US-Lph", "US-Me1", "US-Me3", 
"US-Nc2", "US-NR1", "US-Sp1", "US-Sp2", "US-Sp3", "US-Umb", "US-Wcr", 
"US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8"), class = "factor"), 
    x = c(156, 157, 160, 162, 163, 164, 165, 153, 154, 155, 71, 
    72, 73, 74, 37, 38, 39, 40, 41, 39, 40, 22, 23, 24, 12, 13, 
    14, 15, 16, 4, 5, 6, 7, 74, 75, 76, 77, 78, 79, 80, 81, 82, 
    126, 127, 128, 129, 130, 131, 132, 71, 72, 73, 74, 75, 76, 
    99, 100, 101, 49, 50, 51, 52, 53, 54, 56, 9, 10, 46, 47, 
    48, 84, 85, 86, 87, 88, 77, 78, 79, 101, 105, 106, 107, 108, 
    109, 110, 81, 82, 84, 88, 131, 132, 133, 134, 135, 136, 137, 
    138, 139), y = c(50.0472381226718, 706.825824817992, 729.621982051409, 
    593.225827791495, 685.154353165934, 574.088067465695, 650.30821636616, 
    494.185166497016, 436.312162090908, 631.891738044098, 280.949480787385, 
    641.231373456365, 412.116433330579, 416.824746264203, 415.905685925856, 
    494.374217984441, 201.745910386788, 486.030122926459, 647.782697262242, 
    389.839577941515, 256.552344558528, 605.790549736819, 483.045965372879, 
    668.017897433514, 35.2706101682852, 265.693628564011, 285.116345260642, 
    291.023782284086, 357.428790589795, 205.920375034591, 229.606221692753, 
    230.952761338012, 241.641164634028, 1089.06303295676, 1255.88808925333, 
    1087.75402177912, 1068.248897182, 1212.17254891642, 884.222588171535, 
    938.887718005513, 863.582247020735, 1065.91969416523, 902.338968377328, 
    790.570635510725, 834.500908313203, 710.755041345197, 814.002362551197, 
    726.814950022846, 828.559687148314, 611.564698476112, 603.238720579422, 
    524.322001078981, 565.296378873638, 532.431853589369, 597.174114277044, 
    606.075737104722, 686.408686154056, 705.914347674276, 1858.98340779543, 
    1893.38468471169, 1819.83262739703, 1827.31409981102, 1640.5816780664, 
    1689.0365549922, 2112.67917439342, 479.374777290737, 326.663507855032, 
    1184.81825619942, 1281.2920902365, 1269.12480160726, 1265.48484068702, 
    1193.29000986667, 1156.81486114406, 1199.7373066445, 1116.24029749935, 
    1100.47051284742, 1072.57190890331, 1228.25697739795, 1576.32775748242, 
    1631.14609672129, 1796.87265141308, 1712.90461264737, 1844.87409764528, 
    1938.56225809082, 1663.52108450048, 1626.12740687071, 1333.52924151719, 
    1349.01338642137, 1376.41668179166, 1362.32371946308, 1317.75608457439, 
    1519.12511487596, 1558.26111694807, 1588.8933303128, 1624.50100837374, 
    1433.10019567201, 1371.01498340943, 1439.94849821774)), .Names = c("ID", 
"x", "y"), row.names = c(290L, 291L, 292L, 293L, 294L, 295L, 
296L, 297L, 298L, 299L, 300L, 301L, 302L, 303L, 304L, 305L, 306L, 
307L, 308L, 309L, 310L, 311L, 312L, 313L, 314L, 315L, 316L, 317L, 
318L, 319L, 320L, 321L, 322L, 323L, 324L, 325L, 326L, 327L, 328L, 
329L, 330L, 331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 339L, 
340L, 341L, 342L, 343L, 344L, 351L, 352L, 353L, 424L, 425L, 426L, 
427L, 428L, 429L, 430L, 471L, 472L, 493L, 494L, 495L, 512L, 513L, 
514L, 515L, 516L, 266L, 267L, 268L, 438L, 439L, 440L, 441L, 442L, 
443L, 444L, 451L, 452L, 453L, 454L, 484L, 485L, 486L, 487L, 488L, 
489L, 490L, 491L, 492L), class = "data.frame")

I wanted to compute a the model efficiency in a cross validation leave one subject out mode on this dataframe by using one non-linear function, which I have implemented as such:

library(stats)
library (hydroGOF)

Out <- c()
id <- unique(df$ID)
for (i in id){
  fit1 <- try(nls(y~A*x^3+B*x^2+C*x+D, data = df[df$ID != i,], start = list(A=0.02, B=-0.6, C= 50, D=200)), silent=TRUE)
  Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=df[df$ID==i,])
}

MEF<- NSE(Out, df$y)

However, I would like to create a function out of it in order to apply it in n dataframe with the same structure but by also including two non-linear functions in the loop. I have started to implement this line of codes but without success.

stat <- function(dat) {
  id <- unique(dat$ID)
  Out<-c()
  Out2<-c()
  for (i in id){
    fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[dat$ID != i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
    Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[dat$ID != i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
    Out2[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
c(Out, Out2)
}}

df.list<-list(df) # Here I put only one dataframe but it will be more than one. 

res<-lapply(df.list, stat) 

Anyone could solve my issue?

2 Answers 2

2

I didn't get if you want it to output the NSE result or just the Out and Out2 so I included two variations.

First of all I think your function needs to list Out and Out2 instead of using c which will concatenate the two.

Then you can do:

stat <- function(dat) {
  id <- unique(dat$ID)
  Out<-c()
  Out2<-c()
  for (i in id){
    fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[dat$ID != i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
    Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
    fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[dat$ID != i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
    Out2[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
  }

  list(Out, Out2)
  #list(NSE(Out, dat$y), NSE(Out2, dat$y))
}

Which will output just Out and Out2 on which then you could use NSE:

df.list<-list(df) # Here I put only one dataframe but it will be more than one. 

res<-lapply(df.list, stat) 
> res
[[1]]
[[1]][[1]]
 [1] 1293.19052 1296.35786 1291.80408 1290.99605 1298.77649 1288.68723 1280.85556 1274.29969   73.09179  137.39803  199.57234  253.22724  306.64911  767.04455
[15]  805.64571  830.52502  852.82856  392.31343  392.76582  381.99857  471.69255  468.30934 1213.32652 1318.90282 1336.20432 1331.59450 1093.61066  776.19050
[29]  816.66237  855.65923  347.74230  411.60219 1187.15420 1175.01320 1169.16425 1160.33162 1148.21352 1145.15265 1134.55927 1126.44445 1112.65676  667.97854
[43]  656.19839  654.13085  639.01529  635.08903  620.08683 1222.46503 1214.07093 1206.90274 1197.26398 1188.93353 1178.60658  142.68868  209.94101  278.51757
[57]  339.27000  401.06092  952.40556  939.72216  928.89347  627.67858  671.42009  217.45400 1229.16628 1303.97471 1305.00665 1303.10220 1306.25091 1302.15214
[71] 1284.16542 1268.00061 1252.26307 1241.32474 1236.48697 1234.85012 1273.66465 1283.02331 1305.82044 1316.90994  137.40424 1326.03716 1323.69234  896.51028
[85]  936.55168  945.38984  937.21832  919.84053  909.75825 1088.71144 1080.18786 1070.87042 1059.73379 1051.40485 1239.73625 1215.11176 1193.85910

[[1]][[2]]
 [1] 1293.19052 1296.35786 1291.80408 1290.99605 1298.77649 1288.68723 1280.85556 1274.29969   73.09179  137.39803  199.57234  253.22724  306.64911  767.04455
[15]  805.64571  830.52502  852.82856  392.31343  392.76582  381.99857  471.69255  468.30934 1213.32652 1318.90282 1336.20432 1331.59450 1093.61066  776.19050
[29]  816.66237  855.65923  347.74230  411.60219 1187.15420 1175.01320 1169.16425 1160.33162 1148.21352 1145.15265 1134.55927 1126.44445 1112.65676  667.97854
[43]  656.19839  654.13085  639.01529  635.08903  620.08683 1222.46503 1214.07093 1206.90274 1197.26398 1188.93353 1178.60658  142.68868  209.94101  278.51757
[57]  339.27000  401.06092  952.40556  939.72216  928.89347  627.67858  671.42009  217.45400 1229.16628 1303.97471 1305.00665 1303.10220 1306.25091 1302.15214
[71] 1284.16542 1268.00061 1252.26307 1241.32474 1236.48697 1234.85012 1273.66465 1283.02331 1305.82044 1316.90994  137.40424 1326.03716 1323.69234  896.51028
[85]  936.55168  945.38984  937.21832  919.84053  909.75825 1088.71144 1080.18786 1070.87042 1059.73379 1051.40485 1239.73625 1215.11176 1193.85910

Or use:

stat <- function(dat) {
  id <- unique(dat$ID)
  Out<-c()
  Out2<-c()
  for (i in id){
    fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[dat$ID != i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
    Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
    fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[dat$ID != i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
    Out2[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
  }

  #list(Out, Out2)
  c(NSE(Out, dat$y), NSE(Out2, dat$y))
}

If you want it to output the NSEs instead:

df.list<-list(df) # Here I put only one dataframe but it will be more than one. 

res<-lapply(df.list, stat) 
> res
[[1]]
[1] 0.3125795 0.3125795

Edit for answering the comment:

stat <- function(dat) {
  id <- nrow(dat)
  Out<-c()
  Out2<-c()
  for (i in 1:id){
    fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[-i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
    Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[i,]) else NA;
    fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[-i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
    Out2[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[i,]) else NA;
  }

  #list(Out, Out2)
  c(NSE(Out, dat$y), NSE(Out2, dat$y))
}


> stat(df)
[1] 0.2571609 0.2571609

Edit2:

stat <- function(dat) {
  id <- unique(dat$ID)
  NSEs<-list()
  for (i in id){
    fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[dat$ID != i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
    Out <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
    fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[dat$ID != i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
    Out2 <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
    NSEs[[length(NSEs)+1]] <- c(NSE(Out, dat$y[dat$ID == i]), NSE(Out2, dat$y[dat$ID == i]))
  }
  NSEs
}


> stat(df)
[[1]]
[1] -4.218322 -4.218322

[[2]]
[1] -27.30966 -27.30966

[[3]]
[1] -35.98506 -35.98506

[[4]]
[1] -10.89336 -10.89336

[[5]]
[1] -73.49176 -73.49176
#and so on...
Sign up to request clarification or add additional context in comments.

22 Comments

Thamks a lot. I indeed wanted the output of the NSE. Great job!
@LyzanderR. In my real data, I have data points with identical IDs. It seems that the code you provided give only one predicted value per ID and not one predicted value per row. Do you know why?
Yeah of course it does that since for example dat[dat$ID != i,] will return more than one rows if i has duplicate IDs. In case you want to do it for each row I suggest you use id <- nrow(dat) instead of id <- unique(dat$ID) and then do for (i in 1:id) instead of for (i in id) (all else stays the same) to do the calculation for each row.
Alright but in the out object, there are only NA values. Do you know why?
Alright thanks anyway. I don't know what is happening with the real data because they have the same kind of structure than the example.
|
0
output <- sapply(
  seq_len(nrow(df)), 
  function(i){
    model <- list(
      try(
        nls(
          y ~ A * (x ^ B) * (exp(k * x)), 
          data = df[-i,], 
          start = list(A = 1000, B = 0.170, k = -0.00295)
        ), 
        silent=TRUE
      ),
      try(
        nls(
          y ~ A * x ^ 2 + B * x + C, 
          data = df[-i,], 
          start = list(A = -0.4, B = 50, C= 300)
        ), 
        silent=TRUE
      )
    )
    sapply(model, function(x){
      ifelse(
        class(x) == "try-error",
        NA,
        predict(x, newdata = df[i, ])
      )
    })
  }
)

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.