#!/bin/bash -l #Specify project #$ -P scv #replace with your own project_name #Give the name to the job #$ -N bowtie2_example #Send an email when the job is finished (or aborted) #$ -m ae #Join the error and output file #$ -j y #Specify qlog directory: #$ -o ../qlog #Request multiple cores: #since bowtie2 supports multithreading, we request 4 cores in this example #$ -pe omp 4 # Now let's keep track of key job information: echo "==========================================================" echo "Starting on : $(date)" echo "Running on node : $(hostname)" echo "Current directory : $(pwd)" echo "Current job ID : $JOB_ID" echo "Current job name : $JOB_NAME" echo "Number of cores: $NSLOTS" echo "==========================================================" # define variables: DATA_DIR=../data REF_DIR=../ref OUT_DIR=../out #Sepcify the version of bowtie module load bowtie2/2.4.2 module load samtools/1.10 module load bcftools/1.10.2 # run bowtie2-build to build index # output reference index in the same directory as where reference database file is # assign command string to CMD variable # index shall run only once: # check if index files already exists: if [ ! -e ${REF_DIR}/NC_012967.1.1.bt2 ]; then # build index for E. coli reference and put the index files in the same directory as reference sequence # in this example, we don't need to use -p to specify multithreads, since the index build process is stunning fast even with single core for this job. CMD="bowtie2-build ${REF_DIR}/NC_012967.1.fasta ${REF_DIR}/NC_012967.1" # print out $CMD for track/debug echo $CMD # executate command: eval $CMD else echo "${REF_DIR}/NC_012967.1 index already exists!" fi # now that we have built the index, we can go ahead call the aligner # the alignment takes some time around 10min to finish if run with single core # so we use $NSLOTS to pass as the number of threads to make sure the command only use the number of cores requested # incorporate JOB_NAME in the output file name to distinguish output accordingly CMD="bowtie2 -p $NSLOTS -t -x ${REF_DIR}/NC_012967.1 -1 ${DATA_DIR}/SRR030257_1.fastq -2 ${DATA_DIR}/SRR030257_2.fastq -S ${DATA_DIR}/${JOB_NAME}_SRR030257.sam" echo $CMD eval $CMD # following steps are beyond bowtie2, but rather from some typical post processing need using samtools/bcftools. for this reason, we module load samtools/ and bcftools ahead at the beginning of this job script. # Use samtools view to convert the SAM file into a BAM file. # BAM is the binary format corresponding to the SAM text format. # It's more storage efficient. CMD="samtools view -bS ${DATA_DIR}/${JOB_NAME}_SRR030257.sam > ${DATA_DIR}/${JOB_NAME}_SRR030257.bam" echo $CMD eval $CMD # Use samtools sort to convert the BAM file to a sorted BAM file. # sorted BAM is a useful format because the alignments are # (a) compressed, which is convenient for long-term storage, and # (b) sorted, which is conveneint for variant discovery. CMD="samtools sort ${DATA_DIR}/${JOB_NAME}_SRR030257.bam -o ${DATA_DIR}/${JOB_NAME}_SRR030257.sorted.bam" echo $CMD eval $CMD # To generate variant calls in VCF format, use bcftools mpileup (instead of # samtools mpileup, which is depreciated). Run against reference: CMD="bcftools mpileup -f ${REF_DIR}/NC_012967.1.fasta ${DATA_DIR}/${JOB_NAME}_SRR030257.sorted.bam | bcftools call -mv -Ob -o ${DATA_DIR}/${JOB_NAME}_SRR030257.raw.bcf" echo $CMD eval $CMD # Then to view the variants, run: CMD="bcftools view ${DATA_DIR}/${JOB_NAME}_SRR030257.raw.bcf" echo $CMD eval $CMD # print out end message: echo "DONE!"