#!/bin/bash -l

#Specify project
#$ -P scv #replace with your own project_name

#Give the name to the job
#$ -N sra_example

#Send an email when the job is finished (or aborted)
#$ -m ae

#Join the error and output file
#$ -j y

#Specify qlog directory: 
#make sure ../qlog preexists before job is submitted
#$ -o ../qlog 

#Request multiple cores:
#since bwo supports multithreading, we request 8 cores in this example 
#$ -pe omp 8

# Now let's keep track of key job information:
echo "=========================================================="
echo "Starting on : $(date)"
echo "Running on node : $(hostname)"
echo "Current directory : $(pwd)"
echo "Current job ID : $JOB_ID"
echo "Current job name : $JOB_NAME"
echo "Number of cores: $NSLOTS"
echo "=========================================================="


# define directories and make sure they exist:
DATA_DIR=../data
if [!-e $DATA_DIR] ; then
    mkdir -p $DATA_DIR
fi
OUT_DIR=../out
if [!-e $OUT_DIR] ; then
    mkdir -p $OUT_DIR
fi
QLOG_DIR=../qlog
IN_DIR=../in

#Sepcify the version 
module load sratoolkit/2.11.1

# in this example, we are going to demonstrate how to use 'prefetch' and 'fastq-dump' tool together to get the experiment data in a batch. 
# the accession list is put in in/PRJNA525241_SRR_Acc_List.txt

# use $NSLOTS to make sure the command only use the number of cores requested
# incorporate JOB_NAME in the output file name to distinguish output accordingly
# use $TMPDIR and $USER to specify the dedicated temporary folder for better track

CMD=prefetch --output-directory ${OUT_DIR}/sra --option-file ${IN_DIR}/PRJNA525241_SRR_Acc_List.txt
echo $CMD
eval $CMD

# run fasterq-dump to batch download sequences for multiple runs: 
for file in ${OUT_DIR}/sra/*.sra
do
  CMD="
  fasterq-dump \
  -p \
  -t $TMPDIR/$USER \
  --outdir ${OUT_DIR}/PRJNA525241 \
  --split-files \
  --threads $NSLOTS \
  ${file}
  "
  echo $CMD
  eval $CMD
done


# print out end message: 
echo "ALL DONE!"