High performance computing in R using doSNOW package

Note: This is the talk I gave to Statistical Computing Series, Department of Biostatistics at Vanderbilt University on October 10, 2014.


Opening

What’s a single criterion for a good R program?

library("plotrix")
plot(1:5,seq(1,10,length=5),type="n",xlab="",ylab="", xaxt='n', yaxt='n', xlim=c(-12,2), ylim=c(2,10))

# one loop
draw.circle(-10,5,0.2,border="purple",lty=1,lwd=3)
text(-10,8,"1 loop", cex=0.8)

# two loops
draw.circle(-8,5,0.2,border="purple",lty=1,lwd=3)
draw.circle(-7.6,5,0.3,border="red",lty=1,lwd=3)
text(-7.8,8,"2 loops", cex=0.8)

# three loops
draw.circle(-6,5,0.2,border="purple",lty=1,lwd=3)
draw.circle(-5.6,5,0.3,border="red",lty=1,lwd=3)
draw.circle(-5,5,0.4,border="green",lty=1,lwd=3)
text(-5.5,8,"3 loops", cex=0.8)

# five loops
draw.circle(-3+1,5,0.2,border="purple",lty=1,lwd=3)
draw.circle(-2.6+1,5,0.3,border="red",lty=1,lwd=3)
draw.circle(-2+1,5,0.4,border="green",lty=1,lwd=3)
draw.circle(-1.5+1,5,0.5,border="blue",lty=1,lwd=3)
draw.circle(-0.8+1,5,0.6,border="yellow",lty=1,lwd=3)
text(-2.5+1.5,8,"5 loops", cex=0.8)

text(-3.3, 6 ,"?", cex=1.5)

No “For” loops!


Parallel programming in R

  • What’s parallel computing?
    • Splitting the problem into pieces
    • Executing the pieces in parallel
    • Combining the results back together.
  • Difference in node, processor and core

1 Node = Multiple processors

1 Processor = Multiple cores

In my computer:

Number of Processors: 1 Total Number of Cores: 4

  • Parallel programming in R :

Packages: Snow, multicore, foreach, parallel, doMC, doSNOW

Multicore and doMC work only on a single node (and not on Windows).doMC and doSNOW are based on foreach package. doSNOW can be used in all platforms.


doSNOW package

Parallel programming with Foreach

  • Number of cores in your computer
library(parallel)
detectCores()
## [1] 8
# This number devided by 2 is the number of cores in your computer
  • Register clusters
library(doSNOW)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: snow
## 
## Attaching package: 'snow'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, clusterSplit, makeCluster,
##     parApply, parCapply, parLapply, parRapply, parSapply,
##     splitIndices, stopCluster
NumberOfCluster <- 4 
# how many jobs you want the computer to run at the same time

cl <- makeCluster(NumberOfCluster) # Make clusters
registerDoSNOW(cl) # use the above cluster
# your parallel programming code
# code
# code
stopCluster(cl) # close clusters
  • Review: For loop syntax

    for (i in vector){
      code
      return(object)
    }
  • Foreach loop syntax

    foreach (i=vector, .combine='fun') \%dopar\% {
      code
      return(object)
    }

By default, the results are returned in a list.

.combine = ‘fun’
fun = c : vector
fun = cbind: column bind
fun = rbind: row bind
fun = + : sum data
fun = * : multiple data
fun can be any function that you define.

x <- matrix(NA, ncol=5, nrow=5)
for (i in 1:5){
  x[i,] <- (1:5)^i
}
x
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5
## [2,]    1    4    9   16   25
## [3,]    1    8   27   64  125
## [4,]    1   16   81  256  625
## [5,]    1   32  243 1024 3125
library(doSNOW)
NumberOfCluster <- 4
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)
x0 <- foreach(i=1:5)%dopar%{
  y <-  (1:5)^i
  return(y) 
}
class(x0)
## [1] "list"
x <- foreach(i=1:5, .combine='rbind')%dopar%{
  y <-  (1:5)^i
  return(y) 
}
stopCluster(cl)
x
##          [,1] [,2] [,3] [,4] [,5]
## result.1    1    2    3    4    5
## result.2    1    4    9   16   25
## result.3    1    8   27   64  125
## result.4    1   16   81  256  625
## result.5    1   32  243 1024 3125
# which is equavilant to use .combine='fun' using the following function
fun <- function(...){
  rbind(...) 
}

cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)
x <- foreach(i=1:5, .combine='fun')%dopar%{
  y <-  (1:5)^i
  return(y) 
}
stopCluster(cl)
x
##          [,1] [,2] [,3] [,4] [,5]
## result.1    1    2    3    4    5
## result.2    1    4    9   16   25
## result.3    1    8   27   64  125
## result.4    1   16   81  256  625
## result.5    1   32  243 1024 3125
fun <- function(...){
  rbind(...) + 1
}

cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)
x1 <- foreach(i=1:5, .combine='fun')%dopar%{
  y <-  (1:5)^i
  return(y) 
}
stopCluster(cl)

x1
##          [,1] [,2] [,3] [,4] [,5]
## result.1    5    6    7    8    9
## result.2    5    8   13   20   29
## result.3    4   11   30   67  128
## result.4    3   18   83  258  627
## result.5    2   33  244 1025 3126
x
##          [,1] [,2] [,3] [,4] [,5]
## result.1    1    2    3    4    5
## result.2    1    4    9   16   25
## result.3    1    8   27   64  125
## result.4    1   16   81  256  625
## result.5    1   32  243 1024 3125
# be careful to use your own function, you need to take care of all results
  • When use foreach function, you need to put library(package) within the loop. It’s like you open multiple new R windows. Or you can use .packages option.
NumberOfCluster <- 4
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)

# this won't work
#library(rms)
#x0 <- foreach(i=1:4, .combine='rbind')%dopar%{
#  y <- coef(ols( rnorm(10)~ rnorm(10)))
#  return(y) 
#}

x1 <- foreach(i=1:4, .combine='rbind', .packages='rms')%dopar%{
  y <- coef(ols( rnorm(10)~ rnorm(10)))
  return(y) 
}
x2 <- foreach(i=1:4, .combine='rbind')%dopar%{
  library(rms)
  y <- coef(ols( rnorm(10)~ rnorm(10)))
  return(y) 
}
stopCluster(cl)
cbind(x1,x2)
##            Intercept   Intercept
## result.1 -0.11754253  0.10164419
## result.2 -0.09717836 -0.05690018
## result.3 -0.11276330  0.31898100
## result.4  0.24010852  0.30242084
  • You don’t need to load data in each loop.
data <- data.frame(x1=rnorm(10), x2=rnorm(10), x3=rnorm(10))
NumberOfCluster <- 4
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)
x <- foreach(i=1:10, .combine='c')%dopar%{
  y <- sum(data[i,])
  return(y)
}
stopCluster(cl)
x
##  [1] -0.27402641  0.02453019 -0.08458686 -2.67192952 -0.70532813
##  [6] -1.26668599 -0.31519526 -0.50867259  1.22288502 -2.45280143
  • There is no interaction between loops. Forexample, you can not use the result from \(j^{th}\) loop in \(j+1^{th}\) loop.
x <- 0
for (i in 1:10){
  y <- x[i]+i^i
  x <- c(x, y)
}
x
##  [1]           0           1           5          32         288
##  [6]        3413       50069      873612    17650828   405071317
## [11] 10405071317
# this won't work
x <- 0
cl <- makeCluster(5)
registerDoSNOW(cl)
x <- foreach(i=1:10)%dopar%{
  y <- x[i]+i^i
  x <- c(x, y)
  return(x)
}
stopCluster(cl)
x
## [[1]]
## [1] 0 1
## 
## [[2]]
## [1]  0 NA
## 
## [[3]]
## [1]  0 NA
## 
## [[4]]
## [1]  0 NA
## 
## [[5]]
## [1]  0 NA
## 
## [[6]]
## [1]  0  1 NA
## 
## [[7]]
## [1]  0  1 NA NA
## 
## [[8]]
## [1]  0  1 NA NA NA
## 
## [[9]]
## [1]  0  1 NA NA NA NA
## 
## [[10]]
## [1]  0  1 NA NA NA NA NA

Number of clusters and how much time you can save

pk_simulate <- function() {
  ret <- rnorm(1e6, 0,1)
  return(ret)
}

loops=32

n <- 1

# one simulation
t0 <- system.time({
  set.seed(n)
  pk_simulate()
})

# 32 simulations
t1 <- system.time({
  y <- replicate(loops, {
      set.seed(n)
      pk_simulate()
  })
})

# number of clusters to try
n.cluster <- c(2,4,8,10,16,32)

for (j in n.cluster){
  
  t2 <- system.time({
    cl <- makeCluster(j)
    registerDoSNOW(cl)
    t.snow <- snow.time({
      x <- foreach(i=1:loops) %dopar% {
      set.seed(n)
      return(pk_simulate())
    } 
    })
    stopCluster(cl)
  })
  
  # auto naming
  assign(paste0("t2.",j), t2)
  assign(paste0("t.snow.",j), t.snow)

}

rbind(t0, t1, t2.2, t2.4, t2.8, t2.10, t2.16, t2.32)
##       user.self sys.self elapsed user.child sys.child
## t0        0.130    0.003   0.132      0.000     0.000
## t1        3.403    0.241   3.650      0.000     0.000
## t2.2      0.367    0.274   2.342      0.003     0.002
## t2.4      0.477    0.348   1.943      0.005     0.005
## t2.8      0.513    0.346   2.596      0.010     0.009
## t2.10     0.497    0.340   3.056      0.012     0.011
## t2.16     0.475    0.336   4.061      0.020     0.018
## t2.32     0.433    0.308   7.300      0.041     0.037
#list(t0, t1, t2.2, t2.4, t2.8, t2.10, t2.16, t2.32,
#     t.snow.2, t.snow.4, t.snow.8, t.snow.10, t.snow.16,t.snow.32)

I always use two times the number of cores in the computer. For example, There are 4 cores in my computer. I will make 8 clusters. On Accre, they have 8 cores or 12 cores. You can use 16 or 24 clusters depending on the node.

plot(t.snow.8)

plot(t.snow.10)