#!/bin/bash -l #Specify project #$ -P scv #replace with your own project_name #Give the name to the job #$ -N sra_example #Send an email when the job is finished (or aborted) #$ -m ae #Join the error and output file #$ -j y #Specify qlog directory: #$ -o ../qlog #Request multiple cores: #since bwo supports multithreading, we request 8 cores in this example #$ -pe omp 8 # Now let's keep track of key job information: echo "==========================================================" echo "Starting on : $(date)" echo "Running on node : $(hostname)" echo "Current directory : $(pwd)" echo "Current job ID : $JOB_ID" echo "Current job name : $JOB_NAME" echo "Number of cores: $NSLOTS" echo "==========================================================" # define directories and make sure they exist: DATA_DIR=../data if [!-e $DATA_DIR] ; then mkdir -p $DATA_DIR fi OUT_DIR=../out if [!-e $OUT_DIR] ; then mkdir -p $OUT_DIR fi QLOG_DIR=../qlog if [!-e $QLOT_DIR] ; then mkdir -p $QLOG_DIR fi IN_DIR=../in #Sepcify the version module load sratoolkit/2.11.1 # in this example, we are going to demonstrate how to use 'prefetch' and 'fastq-dump' tool together to get the experiment data in a batch. # the accession list is put in in/PRJNA525241_SRR_Acc_List.txt # use $NSLOTS to make sure the command only use the number of cores requested # incorporate JOB_NAME in the output file name to distinguish output accordingly # use $TMPDIR and $USER to specify the dedicated temporary folder for better track CMD=prefetch --output-directory ${OUT_DIR}/sra --option-file ${IN_DIR}/PRJNA525241_SRR_Acc_List.txt echo $CMD eval $CMD # run fasterq-dump to batch download sequences for multiple runs: for file in ${OUT_DIR}/sra/*.sra do CMD=" fasterq-dump \ -p \ -t $TMPDIR/$USER \ --outdir ${OUT_DIR}/PRJNA525241 \ --split-files \ --threads $NSLOTS \ ${file} " echo $CMD eval $CMD done # print out end message: echo "ALL DONE!"