#!/bin/bash -l #Specify project #$ -P scv #replace with your own project_name #Give the name to the job #$ -N tophat_example #Send an email when the job is finished (or aborted) #$ -m ae #Join the error and output file #$ -j y #Specify qlog directory: #$ -o ../qlog # make sure the directory pre-exist before job starts to run #Request multiple cores: #since tophat supports multithreading (use --threads option), #we request 4 cores in this example #$ -pe omp 4 # Now let's keep track of key job information: echo "==========================================================" echo "Starting on : $(date)" echo "Running on node : $(hostname)" echo "Current directory : $(pwd)" echo "Current job ID : $JOB_ID" echo "Current job name : $JOB_NAME" echo "Number of cores: $NSLOTS" echo "==========================================================" # define variables: DATA_DIR=../data REF_DIR=../ref OUT_DIR=../out #Sepcify the version of bowtie module load bowtie2/2.4.2 module load tophat/2.1.1 # in this example, let's build everything from scratch # first removing existing index and rebuild it with the # same bowtie2 in the pipeline: # assign command string to CMD variable # index shall run only once: # check if index files already exists: if [ -e ${REF_DIR}/test_ref.bt2 ]; then # remove the existing index rm -f ${REF_DIR}/test_ref.*.bt2 fi # build index, and this is not a good example to apply multithreading, since the reference genome is quite small, and it would finish fast even without multiple cores. So just for demonstration purpose that it adopts '--threads' # in real scenario, user shall have much larger reference genome as well as read data, so add multithread option is an ideal. CMD="bowtie2-build --threads $NSLOTS ${REF_DIR}/test_ref.fa ${REF_DIR}/test_ref" # print out $CMD for track/debug echo $CMD # executate command: eval $CMD echo "Done with building index!" # now that we have built the index, we can go ahead call tophat directly # incorporate JOB_NAME in the output file name to distinguish output accordingly # the output will be put in ${OUT_DIR}/tophat_pe_qsub subdirectory # note - multithread option may differ from one tool to another tool, make sure # to use '-h' or '--help' to check and verify the correct spell: CMD="tophat --num-threads $NSLOTS --no-coverage-search -o ${OUT_DIR}/tophat_pe_qsub ${REF_DIR}/test_ref ${DATA_DIR}/reads_1.fq ${DATA_DIR}/reads_2.fq" echo $CMD eval $CMD # From here you can add many other post-postprocessing steps to further look into the alignment results. # print out end message: echo "DONE!"