#!/bin/bash -l

# Example script for submitting Gaussian16 calculations and
# post-processing
# on BU SCC using the $TMPDIR to avoid network problems.
# Using the OpenMP parallelization of Gaussian
# and environment variables to set options matching script.

# Gaussian input file is expected in g16_omp.com

# Output directly from this script is in $JOB_ID-qsub.out,
# Output from Gaussian and additional information is in
# $JOB_ID/$JOB_ID-g16.log

# This script circumvents the need for %nosave option, 
# Gaussian's well built deletion of scratch files:
# the job is executed in a scratch directory, only the important
# files are copied back. Besides avoiding network/backup/mirroring/indexing
# problems relating to scratch files, there is a smaller memory
# footprint in the directory containing the job submission and output.

# Many user specific qsub options such as -M user_email have been
# removed, see link for their description to add them back
# https://www.bu.edu/tech/support/research/system-usage/running-jobs/submitting-jobs/

# Being paranoid, the time limit is at 8 hours, feel free to adjust.
# Note that the time limit is for WALL time, changing the number
# of nslots does not change the time limit.


# Gaussian jobs should be run with the AVX option. 
# Gaussian utilties can NOT make use of multiple threads provided by MPI_N_tasks_per_node,
#  please be careful about changing the qsub options!
# Questions on exactly what this does should be directed to SCC help.

# Script collated by:
# Luke Nambi Mohanam, lmohanam<at>bu.edu

#$ -N g16_omp
#$ -pe omp 8
#$ -l h_rt=8:00:00
#$ -l mem_per_core=8G
#$ -l avx
#$ -cwd
#$ -j y


# get code to exit if error encountered
set -e

# Identify submission directory, matching -cwd option.
#  this is the simplest way to ensure files are copied from and back from
#   the correct path. And locks the standard output file path.
SUBMIT_DIR=$PWD

# Set environment variables for Gaussian matching the above options
# Note utility programs have different options.

# Number of OMP 'slots'
export GAUSS_PDEF=$NSLOTS
# setting memory "available" for main program
## This is set manually as Gaussian will use slight more than
## this amount and there are background tasks that might use
## more memory as well. 
## This script sacrifices 0.5GB of memory per core to give a total of
export GAUSS_MDEF=60GB 
# Set Gaussian utilities to use the same resources as the main program
export GAUSS_UDEF=$GAUSS_MDEF 
export GAUSS_MEMDEF=$GAUSS_MDEF 
## old checkpoint file variable is not used, 
## old file automatically copied to scratch as new checkpoint file.
## Preserving old checkpoint file in the submission directory.
# export GAUSS_ICDEF=old.chk
# checkpoint file
export GAUSS_YDEF=g16.chk
# Manually setting matching formatted checkpoint file to match $GAUSS_YDEF
export g16qsub_FCHK=g16.fchk
# RWF, read write file,  that contains wave function
export GAUSS_ZDEF=g16.rwf

if [ "$SGE_TASK_ID" == "undefined" ] ; then
  INDEX_JOB=0
else
  INDEX_JOB=$SGE_TASK_ID
fi

# create output directory
OUTPUT_DIR=$PWD/$JOB_ID.$INDEX_JOB
mkdir $OUTPUT_DIR
LOG_FILE=$JOB_ID.$INDEX_JOB-g16.log

# Function to always call upon script termination

function end_job_steps {

#Copy back individual files

# Copy back output of Gaussian programs and this script's header
     cp $TMPDIR/$LOG_FILE $OUTPUT_DIR/.

# Copy back of main Gaussian input file:
#  uncomment below if desired.
#     cp $TMPDIR/g16_omp.com $OUTPUT_DIR/$JOB_ID.$INDEX_JOB-g16.com
# Copying (unformatted)checkpoint file can be 
#  uncomment below if desired.
#     cp $TMPDIR/$GAUSS_YDEF $OUTPUT_DIR/$JOB_ID.$INDEX_JOB-$GAUSS_YDEF
# Copying formatted checkpoint file can be 
#  uncomment below if desired. 
### If sending to a different machine, copy back formatted checkpoint file
### Otherwise unformatted is smaller, copy back unformatted checkpoint file
#  uncomment below if desired.
#     cp $TMPDIR/$g16qsub_FCHK $OUTPUT_DIR/$JOB_ID.$INDEX_JOB-$g16qsub_FCHK
# Copying read-write file
#  uncomment below if desired.
#     cp $TMPDIR/$GAUSS_ZDEF $OUTPUT_DIR/$JOB_ID.$GAUSS_ZDEF

# Copy any .cub files generated
     cp $TMPDIR/*.cub $OUTPUT_DIR/.

# Last resort copy everything:
#     cp $TMPDIR/* $OUTPUT_DIR/.

     wait
}

trap end_job_steps EXIT

# Check gaussian input file is correctly named.
if [ ! -e g16_omp.com ] ; then
  echo "Gaussian input file does not exist!"
  exit 1
fi

# check scratch directory TMPDIR for sufficient space
# This does not reserve the disk space, so feel free to specify
# the size including temporary and intermediate files
TMP_FREE_SPACE=$(df $TMPDIR | awk '{print $4}' | tail -n 1) #free space on $TMPDIR in KB
if (( $TMP_FREE_SPACE < 150000000 )); then # make sure there is 150GB of space
  echo "not enough space on TMPDIR"
  exit 1
fi
echo "qsub output for Gaussian16 job"
echo ""
echo "SPACE ON TMPDIR:"
echo $TMP_FREE_SPACE


# cp is used instead of rsync to avoid extra scanning --
# failed copying can not be restarted!

#keep copy of this script in output directory
cat $0 > $OUTPUT_DIR/$JOB_ID.$INDEX_JOB.qsub
#populate scratch directory
## input file
cp $SUBMIT_DIR/g16_omp.com $TMPDIR/.
## locate checkpoint file. If multiple are present, 
## the default name will overwrite the others present
if [ -e $GAUSS_YDEF ] ; then
  cp $GAUSS_YDEF $TMPDIR/$GAUSS_YDEF
elif [ -e *.chk ] ; then
  cp *.chk $TMPDIR/$GAUSS_YDEF
fi
wait
echo "Scratch input created!"

# Final check to standard output before run, can be removed.
cd $TMPDIR
echo "tmp dir contents at start:"
ls -al
echo "Host Name: $HOSTNAME"
echo "Tmp Dir:   $TMPDIR"

# header for .log file that is the main output of job.
echo "g16 OMP qsub call with environment variables:" &> $LOG_FILE
echo "Job ID:    $JOB_ID" >> $LOG_FILE
echo "Task ID:   $SGE_TASK_ID" >> $LOG_FILE
echo "Job Name:  $JOB_NAME" >> $LOG_FILE
echo "Host Name: $HOSTNAME" >> $LOG_FILE
echo "Submit Dir:   $SUBMIT_DIR" >> $LOG_FILE
echo "Output Dir:   $OUTPUT_DIR" >> $LOG_FILE
echo "Tmp Dir:   $TMPDIR" >> $LOG_FILE

#load gaussian and run calculation
#Note that SCC does not overwrite environment variables
#previously set.
module load gaussian/16.B.01
echo "Gaussian module 16.B.01" >> $LOG_FILE
echo "" >> $LOG_FILE

echo "Starting Gaussian 16 Job!"
g16 < $TMPDIR/g16_omp.com >> $LOG_FILE 2>&1
if [ "$?" -ne "0" ]; then
  echo "g16 executable encountered an error"
  exit 1
else
  echo "job ended normally"
fi

echo "Creating Formatted output!"
formchk $TMPDIR/$GAUSS_YDEF >> $LOG_FILE 2>&1
if [ "$?" -ne "0" ]; then
  echo "formchk encountered an error"
  exit 1
else
  echo "formchk ended normally"
fi
# formatted checkpoint file is $g16qsub_FCHK

echo "Post processing!"

# call post processing programs below here.
# Examples are for a closed shell calculation. 
# Ensure these files are copied back to the output directory!

## Generate cub file for the density, also set grid header that
## will be referenced by future files
cubegen $GAUSS_PDEF Density=SCF $g16qsub_FCHK "density.cub" -2 h >> $LOG_FILE 2>&1
if [ "$?" -ne "0" ]; then
  echo "Density printing by cubgen encountered an error"
  echo "will break formatting of subsequent cubgen calls"
  exit 1
else
  echo "first cubgen call ended normally"
fi
## Find HOMO from FCHK
num_homo=$(grep "alpha electrons" $g16qsub_FCHK | head -n 1 | awk '{print $6}')
## Generate cub file for the HOMO
cubegen $GAUSS_PDEF MO=$num_homo $g16qsub_FCHK "HOMO.cub" -1 h "density.cub" >> $LOG_FILE 2>&1
## Set LUMO based on HOMO
num_lumo=$(($num_homo + 1))
## Generate cub file for the LUMO
cubegen $GAUSS_PDEF MO=$num_lumo $g16qsub_FCHK "LUMO.cub" -1 h "density.cub" >> $LOG_FILE 2>&1
## Generate cub file for the HOMO+j where j is set by this loop
for (( j=1; j<=3; j++ ))
do 
   cubegen $GAUSS_PDEF MO=$(($num_homo - $j)) $g16qsub_FCHK "HOMO-$j.cub" -1 h "density.cub" >> $LOG_FILE 2>&1
done
## Generate cub file for the LUMO+k where k is set by this loop
for (( k=1; k<=3; k++ ))
do 
   cubegen $GAUSS_PDEF MO=$(($num_lumo + $k)) $g16qsub_FCHK "LUMO+$k.cub" -1 h "density.cub" >> $LOG_FILE 2>&1
done

# some last output to standard output to catch any error files.
echo ""
echo "Job Done"
echo ""
echo "tmp dir contents at end:"
ls -al

# TMPDIR is deleted at the end of the calculation, all
# information not copied in function end_job_steps is lost!
# qdel $JOB_ID will NOT cause the function to be called!