# Enviroment variable TMPDIR is created when the job starts,
# It contains the path to the temporary directory created in /scratch space for this job
# To get the value of the enviroment variable use Sys.getenv() command:
tmpdir <- Sys.getenv("TMPDIR")
# User can create his own subdirectory using for example his user ID:
tmpdir <- paste("/scratch/",Sys.getenv("USER"),sep="")
system(paste("mkdir",tmpdir))
#Create some data to write to a file
t <- rnorm( 100000, 50, 10 )
p <- rnorm(100000,998,50)
prec <- rbinom(100000,1,0.2)
dt <- data.frame(temperature=t,pressure=p,precipitation=prec)
# Measure the time writing dataset to the project space (over the network) and local scratch directory
system.time(write.csv(dt, file="_data.csv"))
user system elapsed
0.481 0.006 0.729
system.time(write.csv(dt, file=paste(tmpdir,"/_data.csv",sep="")))
user system elapsed
0.440 0.009 0.450
# In some cases the data has to be written one record at a time
out.dt <- function(infile, line){
write.table(line, file=infile,append=TRUE, col.names =FALSE, row.names=FALSE, sep=",")
}
# The most inefficient way: write one record at a time appending it to the end of the file.
# Writing data this way involves execution of 4 commands for every record to be written:
# 1. openning the file
# 2. moving to the end of the file
# 3. writing a line
# 4. closing the file
infile <- "_data.csv"
system.time(for(i in 1:10000) out.dt(infile,dt[i,]))
user system elapsed
6.194 2.179 15.146
# Writing to the local scratch directory will be significantly faster.
infile <- paste(tmpdir,"/_data.csv",sep="")
system.time(for(i in 1:10000) out.dt(infile,dt[i,]))
user system elapsed
6.007 1.677 7.711
# The best way to output the data to the file one record at a time is
# to open a file (preferably in the local scratch directory)
# and then use the connection to this file to write the records.
# At the end the connection to the file needs to be closed.
infile <- paste(tmpdir,"/_data.csv",sep="")
file.conn <- file(infile, open="w")
system.time(for(i in 1:10000) out.dt(file.conn,dt[i,]))
user system elapsed
5.278 1.472 6.771
close(file.conn)
#Clean-up
unlink("_data.csv")
unlink(paste(tmpdir,"/_data.csv",sep=""))