#!/bin/bash -l # Example script for submitting Gaussian16 calculations and # post-processing # on BU SCC using the $TMPDIR to avoid network problems. # Using the OpenMP parallelization of Gaussian # and environment variables to set options matching script. # Gaussian input file is expected in g16_omp.com # Output directly from this script is in $JOB_ID-qsub.out, # Output from Gaussian and additional information is in # $JOB_ID/$JOB_ID-g16.log # This script circumvents the need for %nosave option, # Gaussian's well built deletion of scratch files: # the job is executed in a scratch directory, only the important # files are copied back. Besides avoiding network/backup/mirroring/indexing # problems relating to scratch files, there is a smaller memory # footprint in the directory containing the job submission and output. # Many user specific qsub options such as -M user_email have been # removed, see link for their description to add them back # https://www.bu.edu/tech/support/research/system-usage/running-jobs/submitting-jobs/ # Being paranoid, the time limit is at 8 hours, feel free to adjust. # Note that the time limit is for WALL time, changing the number # of nslots does not change the time limit. # Gaussian jobs should be run with the AVX option. # Gaussian utilties can NOT make use of multiple threads provided by MPI_N_tasks_per_node, # please be careful about changing the qsub options! # Questions on exactly what this does should be directed to SCC help. # Script collated by: # Luke Nambi Mohanam, lmohanambu.edu #$ -N g16_omp #$ -pe omp 8 #$ -l h_rt=8:00:00 #$ -l mem_per_core=8G #$ -l avx #$ -cwd #$ -j y # get code to exit if error encountered set -e # Identify submission directory, matching -cwd option. # this is the simplest way to ensure files are copied from and back from # the correct path. And locks the standard output file path. SUBMIT_DIR=$PWD # Set environment variables for Gaussian matching the above options # Note utility programs have different options. # Number of OMP 'slots' export GAUSS_PDEF=$NSLOTS # setting memory "available" for main program ## This is set manually as Gaussian will use slight more than ## this amount and there are background tasks that might use ## more memory as well. ## This script sacrifices 0.5GB of memory per core to give a total of export GAUSS_MDEF=60GB # Set Gaussian utilities to use the same resources as the main program export GAUSS_UDEF=$GAUSS_MDEF export GAUSS_MEMDEF=$GAUSS_MDEF ## old checkpoint file variable is not used, ## old file automatically copied to scratch as new checkpoint file. ## Preserving old checkpoint file in the submission directory. # export GAUSS_ICDEF=old.chk # checkpoint file export GAUSS_YDEF=g16.chk # Manually setting matching formatted checkpoint file to match $GAUSS_YDEF export g16qsub_FCHK=g16.fchk # RWF, read write file, that contains wave function export GAUSS_ZDEF=g16.rwf if [ "$SGE_TASK_ID" == "undefined" ] ; then INDEX_JOB=0 else INDEX_JOB=$SGE_TASK_ID fi # create output directory OUTPUT_DIR=$PWD/$JOB_ID.$INDEX_JOB mkdir $OUTPUT_DIR LOG_FILE=$JOB_ID.$INDEX_JOB-g16.log # Function to always call upon script termination function end_job_steps { #Copy back individual files # Copy back output of Gaussian programs and this script's header cp $TMPDIR/$LOG_FILE $OUTPUT_DIR/. # Copy back of main Gaussian input file: # uncomment below if desired. # cp $TMPDIR/g16_omp.com $OUTPUT_DIR/$JOB_ID.$INDEX_JOB-g16.com # Copying (unformatted)checkpoint file can be # uncomment below if desired. # cp $TMPDIR/$GAUSS_YDEF $OUTPUT_DIR/$JOB_ID.$INDEX_JOB-$GAUSS_YDEF # Copying formatted checkpoint file can be # uncomment below if desired. ### If sending to a different machine, copy back formatted checkpoint file ### Otherwise unformatted is smaller, copy back unformatted checkpoint file # uncomment below if desired. # cp $TMPDIR/$g16qsub_FCHK $OUTPUT_DIR/$JOB_ID.$INDEX_JOB-$g16qsub_FCHK # Copying read-write file # uncomment below if desired. # cp $TMPDIR/$GAUSS_ZDEF $OUTPUT_DIR/$JOB_ID.$GAUSS_ZDEF # Copy any .cub files generated cp $TMPDIR/*.cub $OUTPUT_DIR/. # Last resort copy everything: # cp $TMPDIR/* $OUTPUT_DIR/. wait } trap end_job_steps EXIT # Check gaussian input file is correctly named. if [ ! -e g16_omp.com ] ; then echo "Gaussian input file does not exist!" exit 1 fi # check scratch directory TMPDIR for sufficient space # This does not reserve the disk space, so feel free to specify # the size including temporary and intermediate files TMP_FREE_SPACE=$(df $TMPDIR | awk '{print $4}' | tail -n 1) #free space on $TMPDIR in KB if (( $TMP_FREE_SPACE < 150000000 )); then # make sure there is 150GB of space echo "not enough space on TMPDIR" exit 1 fi echo "qsub output for Gaussian16 job" echo "" echo "SPACE ON TMPDIR:" echo $TMP_FREE_SPACE # cp is used instead of rsync to avoid extra scanning -- # failed copying can not be restarted! #keep copy of this script in output directory cat $0 > $OUTPUT_DIR/$JOB_ID.$INDEX_JOB.qsub #populate scratch directory ## input file cp $SUBMIT_DIR/g16_omp.com $TMPDIR/. ## locate checkpoint file. If multiple are present, ## the default name will overwrite the others present if [ -e $GAUSS_YDEF ] ; then cp $GAUSS_YDEF $TMPDIR/$GAUSS_YDEF elif [ -e *.chk ] ; then cp *.chk $TMPDIR/$GAUSS_YDEF fi wait echo "Scratch input created!" # Final check to standard output before run, can be removed. cd $TMPDIR echo "tmp dir contents at start:" ls -al echo "Host Name: $HOSTNAME" echo "Tmp Dir: $TMPDIR" # header for .log file that is the main output of job. echo "g16 OMP qsub call with environment variables:" &> $LOG_FILE echo "Job ID: $JOB_ID" >> $LOG_FILE echo "Task ID: $SGE_TASK_ID" >> $LOG_FILE echo "Job Name: $JOB_NAME" >> $LOG_FILE echo "Host Name: $HOSTNAME" >> $LOG_FILE echo "Submit Dir: $SUBMIT_DIR" >> $LOG_FILE echo "Output Dir: $OUTPUT_DIR" >> $LOG_FILE echo "Tmp Dir: $TMPDIR" >> $LOG_FILE #load gaussian and run calculation #Note that SCC does not overwrite environment variables #previously set. module load gaussian/16.B.01 echo "Gaussian module 16.B.01" >> $LOG_FILE echo "" >> $LOG_FILE echo "Starting Gaussian 16 Job!" g16 < $TMPDIR/g16_omp.com >> $LOG_FILE 2>&1 if [ "$?" -ne "0" ]; then echo "g16 executable encountered an error" exit 1 else echo "job ended normally" fi echo "Creating Formatted output!" formchk $TMPDIR/$GAUSS_YDEF >> $LOG_FILE 2>&1 if [ "$?" -ne "0" ]; then echo "formchk encountered an error" exit 1 else echo "formchk ended normally" fi # formatted checkpoint file is $g16qsub_FCHK echo "Post processing!" # call post processing programs below here. # Examples are for a closed shell calculation. # Ensure these files are copied back to the output directory! ## Generate cub file for the density, also set grid header that ## will be referenced by future files cubegen $GAUSS_PDEF Density=SCF $g16qsub_FCHK "density.cub" -2 h >> $LOG_FILE 2>&1 if [ "$?" -ne "0" ]; then echo "Density printing by cubgen encountered an error" echo "will break formatting of subsequent cubgen calls" exit 1 else echo "first cubgen call ended normally" fi ## Find HOMO from FCHK num_homo=$(grep "alpha electrons" $g16qsub_FCHK | head -n 1 | awk '{print $6}') ## Generate cub file for the HOMO cubegen $GAUSS_PDEF MO=$num_homo $g16qsub_FCHK "HOMO.cub" -1 h "density.cub" >> $LOG_FILE 2>&1 ## Set LUMO based on HOMO num_lumo=$(($num_homo + 1)) ## Generate cub file for the LUMO cubegen $GAUSS_PDEF MO=$num_lumo $g16qsub_FCHK "LUMO.cub" -1 h "density.cub" >> $LOG_FILE 2>&1 ## Generate cub file for the HOMO+j where j is set by this loop for (( j=1; j<=3; j++ )) do cubegen $GAUSS_PDEF MO=$(($num_homo - $j)) $g16qsub_FCHK "HOMO-$j.cub" -1 h "density.cub" >> $LOG_FILE 2>&1 done ## Generate cub file for the LUMO+k where k is set by this loop for (( k=1; k<=3; k++ )) do cubegen $GAUSS_PDEF MO=$(($num_lumo + $k)) $g16qsub_FCHK "LUMO+$k.cub" -1 h "density.cub" >> $LOG_FILE 2>&1 done # some last output to standard output to catch any error files. echo "" echo "Job Done" echo "" echo "tmp dir contents at end:" ls -al # TMPDIR is deleted at the end of the calculation, all # information not copied in function end_job_steps is lost! # qdel $JOB_ID will NOT cause the function to be called!