3-with_rslurm.R

# This example shows how to use the 'rslurm' package to automatically parallelize
#  a repeated calculation over all cores of multiple cluster nodes, and re-import
#  the results into RStudio

library(rslurm)

# Creates a random walk from (0, 0) with nsteps and RMS step length sigma 
rwalk <- function(nsteps, sigma) {
    rw_path <- matrix(NA, nrow = nsteps, ncol = 2)
    rw_path[1, ] <- c(0, 0)
    for (i in 2:nsteps) {
        rw_path[i, ] <- rw_path[i - 1, ] + rnorm(2, 0, sigma)
    }
    rw_path   
}

# Creates a random walk and returns some summary statistics
rw_stats <- function(nsteps, sigma) {
    path <- rwalk(nsteps, sigma)
    c(xdist = abs(path[1, 1] - path[nsteps, 1]),
      ydist = abs(path[1, 2] - path[nsteps, 2]),
      xspan = max(path[, 1]) - min(path[, 1]),
      yspan = max(path[, 2]) - min(path[, 2]))
}

# Generate 10000 parameter sets
nwalks <- 10000

params <- data.frame(nsteps = runif(nwalks, 100, 1000),
                     sigma = runif(nwalks, 0, 10))


# Use slurm_apply to parallelize computation over 16 cores on a single cluster node
#  Note that you need to specify the "rwalk" function as an additional object
sjob <- slurm_apply(rw_stats, params, nodes = 1, cpus_per_node = 16, 
                    global_objects = "rwalk")

# Show the status of the job in the queue, or indicate if it's done
print_job_status(sjob)

# Fetch results from output files and combine in single data frame, output summary
results <- get_slurm_out(sjob, "table")
summary(results)

# Delete temporary files generated by rslurm
cleanup_files(sjob)