10

I'm running the following script in R. If I use a %do% rather than a %dopar% the script works fine. However, if in the outer loop I use a %dopar% the loop runs forever without throwing any error (constant increase in memory usage until it goes out of memory). I'm using 16 cores.

library(parallel)
library(foreach)
library(doSNOW)
library(dplyr)


NumberOfCluster <- 16 
cl <- makeCluster(NumberOfCluster) 
registerDoSNOW(cl) 


foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %dopar% 
    { 
      terms <- as.data.table(unique(gsub(" ", "", unlist(terms_list_by_UNSPSC$Terms[which(substr(terms_list_by_UNSPSC$UNSPSC,1,6) == i)])))) 
      temp <- inner_join(N_of_UNSPSCs_by_Term, terms, on = 'V1') 
      temp$V2 <- 1/as.numeric(temp$V2)
      temp <- temp[order(temp$V2, decreasing = TRUE),]
      names(temp) <- c('Term','Imp')
      ABNs <- unique(UNSPSCs_per_ABN[which(substr(UNSPSCs_per_ABN$UNSPSC,1,4) == substr(i,1,4)), 1])

      predictions <- as.numeric(vector()) 
      predictions <- foreach (j = seq(1 : nrow(train)), .combine = 'c', .packages = 'dplyr')  %do% 
      { 
        descr <- names(which(!is.na(train[j,]) == TRUE)) 
        if(unlist(predict_all[j,1]) %in% unlist(ABNs) || !unlist(predict_all[j,1]) %in% unlist(suppliers)) {union_all(predictions, sum(temp$Imp[which(temp$Term %in% descr)]))} else {union_all(predictions, 0)}    

      } 
    save(predictions, file = paste("Predictions", i,".rda", sep = "_")) 
    }
9
  • Did you try with NumberOfCluster <- 2? Commented Feb 6, 2018 at 7:23
  • Just tried and it does not solve the problem. Commented Feb 7, 2018 at 1:37
  • So without the inner %do% loop the code runs fine with %dopar%? Commented Feb 8, 2018 at 9:50
  • No the code runs fine only when both are %do%. If one of the two is %dopar% they do not work. Commented Feb 8, 2018 at 21:31
  • 2
    have you read this, you can nest loops with %:%: cran.r-project.org/web/packages/foreach/vignettes/nested.pdf ? Commented Feb 9, 2018 at 16:20

1 Answer 1

14
+25

The proper way of nesting foreach loop is using %:% operator. See the example. I have tested it on Windows.

library(foreach)
library(doSNOW)

NumberOfCluster <- 4
cl <- makeCluster(NumberOfCluster) 
registerDoSNOW(cl) 

N <- 1e6

system.time(foreach(i = 1:10, .combine = rbind) %:%
              foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))

system.time(foreach(i = 1:10, .combine = rbind) %:%
              foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))

Output:

> system.time(foreach(i = 1:10, .combine = rbind) %:%
+               foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))
   user  system elapsed 
   7.38    0.23    7.64 
> system.time(foreach(i = 1:10, .combine = rbind) %:%
+               foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))
   user  system elapsed 
   0.09    0.00    2.14 

CPU usage for %do% and %dopar%

Scheme for using nested loops is as following:

foreach(i) %:% foreach(j) {foo(i, j)}

Operator %:% is used to nest several foreach loops. You can not do computation between nesting. In your case you have to do two loops, for example:

# Loop over i
x <- foreach(i = 1:10, .combine = c) %dopar% 2 ^ i

# Nested loop over i and j
foreach(i = 1:10, .combine = rbind) %:% foreach(j = 1:10, .combine = c) %dopar% {x[i] + j}

Untested code:

library(data.table)
library(foreach)
library(doSNOW)

NumberOfCluster <- 2
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)

# Create ABNs as list
ABNs <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %dopar% {
  terms <- as.data.table(unique(gsub(" ", "", unlist(terms_list_by_UNSPSC$Terms[which(substr(terms_list_by_UNSPSC$UNSPSC, 1, 6) == i)]))))
  temp <- inner_join(N_of_UNSPSCs_by_Term, terms, on = 'V1')
  temp$V2 <- 1 / as.numeric(temp$V2)
  temp <- temp[order(temp$V2, decreasing = TRUE), ]
  names(temp) <- c('Term', 'Imp')
  unique(UNSPSCs_per_ABN[which(substr(UNSPSCs_per_ABN$UNSPSC,1,4) == substr(i,1,4)), 1])
}

# Nested loop
predictions <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %:%
  foreach(j = seq(1:nrow(train)), .combine = 'c', .packages = 'dplyr') %dopar% {
    descr <- names(which(!is.na(train[j, ]) == TRUE))
    if (unlist(predict_all[j, 1]) %in% unlist(ABNs[[i]]) || !unlist(predict_all[j, 1]) %in% unlist(suppliers)) {
      sum(temp$Imp[which(temp$Term %in% descr)])
    } else 0
  }

for (i in seq_along(predictions)) save(predictions[[i]], file = paste("Predictions", i, ".rda", sep = "_"))
Sign up to request clarification or add additional context in comments.

1 Comment

Thanks djhurio, I thought of using %:% the fact is that my code is structured as foreach - code - nested foreach. yours is just foreach - nested foreach. If you show me how to adapt my code to your solution the 50 points are yours.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.