[code]#!/bin/bash ## Job Name #SBATCH...

#!/bin/bash
## Job Name
#SBATCH --job-name=re-duck
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=30-00:00:00
## Memory per node
#SBATCH --mem=100G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/scrubbed/sr320/1217/



# Directories and programs
bismark_dir="/gscratch/srlab/programs/Bismark-0.21.0"
bowtie2_dir="/gscratch/srlab/programs/bowtie2-2.3.4.1-linux-x86_64/"
samtools="/gscratch/srlab/programs/samtools-1.9/samtools"
reads_dir="/gscratch/srlab/strigg/data/Pgenr/FASTQS/"
genome_folder="/gscratch/srlab/sr320/data/geoduck/v01/"

source /gscratch/srlab/programs/scripts/paths.sh



${bismark_dir}/bismark_genome_preparation \
--verbose \
--parallel 28 \
--path_to_aligner ${bowtie2_dir} \
${genome_folder}


find ${reads_dir}*_R1_001_val_1.fq.gz \
| xargs basename -s _R1_001_val_1.fq.gz | xargs -I{} ${bismark_dir}/bismark \
--path_to_bowtie ${bowtie2_dir} \
-genome /gscratch/srlab/sr320/data/geoduck/v01 \
-p 4 \
-score_min L,0,-0.6 \
-1 /gscratch/srlab/strigg/data/Pgenr/FASTQS/{}_R1_001_val_1.fq.gz \
-2 /gscratch/srlab/strigg/data/Pgenr/FASTQS/{}_R2_001_val_2.fq.gz \




find *.bam | \
xargs basename -s .bam | \
xargs -I{} ${bismark_dir}/deduplicate_bismark \
--bam \
--paired \
{}.bam



${bismark_dir}/bismark_methylation_extractor \
--bedGraph --counts --scaffolds \
--multicore 14 \
--buffer_size 75% \
*deduplicated.bam



# Bismark processing report

${bismark_dir}/bismark2report

#Bismark summary report

${bismark_dir}/bismark2summary



# Sort files for methylkit and IGV

find *deduplicated.bam | \
xargs basename -s .bam | \
xargs -I{} ${samtools} \
sort --threads 28 {}.bam \
-o {}.sorted.bam

# Index sorted files for IGV
# The "-@ 16" below specifies number of CPU threads to use.

find *.sorted.bam | \
xargs basename -s .sorted.bam | \
xargs -I{} ${samtools} \
index -@ 28 {}.sorted.bam





find *deduplicated.bismark.cov.gz \
| xargs basename -s _R1_001_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz \
| xargs -I{} ${bismark_dir}/coverage2cytosine \
--genome_folder ${genome_folder} \
-o {} \
--merge_CpG \
--zero_based \
{}_R1_001_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz


#bismark, #sbatch

[code]#!/bin/bash ## Job Name #SBATCH...

#!/bin/bash
## Job Name
#SBATCH --job-name=el_01
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=3-12:00:00
## Memory per node
#SBATCH --mem=100G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/scrubbed/sr320/1117c/


# Eleni 20191107
# The purpose of this script is to align fastq files to a genome, and output the # alignments as bam files, whose mapping quality is greater than 30. 
# to run this script, place in same folder as the files you want to move and write ./bowtie2_cluster.sh in terminal 

source /gscratch/srlab/programs/scripts/paths.sh



find /gscratch/scrubbed/sr320/eleni/*.fq | xargs basename -s .fq | xargs -I{} bowtie2 \
-x /gscratch/scrubbed/sr320/eleni/GCA_900700415 \
-U /gscratch/scrubbed/sr320/eleni/{}.fq \
-p 28 \
-S /gscratch/scrubbed/sr320/1117c/{}.sam



find /gscratch/scrubbed/sr320/1117/*.sam | \
xargs basename -s .sam | \
xargs -I{} /gscratch/srlab/programs/samtools-1.9/samtools \
view -b -q 30 /gscratch/scrubbed/sr320/1117c/{}.sam -o /gscratch/scrubbed/sr320/1117c/{}.bam


#for file in $files
#do
    #echo ${file} # print the filename to terminal screen
    #bowtie2 -q -x GCA_900700415 -U ${file}.fq|samtools view -b -q 30 > ${file}.bam #conduct the alignment and output the file
#done




#Explanation of terms:
#bowtie2 -q -x <bt2-idx> -U <r> -S <sam>
#-q query input files are in fastq format
#-x <bt2-idx> Indexed "reference genome" filename prefix (minus trailing .X.bt2).
#-U <r> Files with unpaired reads.

# The default of bowtie2, is to write the output of the alignment to the terminal. 
# Also, bowtie does not write BAM files directly, but SAM output can be converted to BAM on the fly by piping bowtie's output to samtools view. 
# samtools options
#  -b       output BAM
# -q <integer> : discards reads whose mapping quality is below this number


#for file in $files
#do
    #echo ${file} # print the filename to terminal screen
    #bowtie2 -q -x GCA_900700415 -U ${file}.fq|samtools view -b -q 30 > ${file}.bam #conduct the alignment and output the file
#done


#q, #u, #x, #bowtie2, #conduct, #do, #done, #echo, #explanation, #for, #sbatch

bowtie

#!/bin/bash
## Job Name
#SBATCH --job-name=el_01
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=3-12:00:00
## Memory per node
#SBATCH --mem=100G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/scrubbed/sr320/1117c/


# Eleni 20191107
# The purpose of this script is to align fastq files to a genome, and output the # alignments as bam files, whose mapping quality is greater than 30. 
# to run this script, place in same folder as the files you want to move and write ./bowtie2_cluster.sh in terminal 

source /gscratch/srlab/programs/scripts/paths.sh



find /gscratch/scrubbed/sr320/eleni/*.fq | xargs basename -s .fq | xargs -I{} bowtie2 \
-x /gscratch/scrubbed/sr320/eleni/GCA_900700415 \
-U /gscratch/scrubbed/sr320/eleni/{}.fq \
-p 28 \
-S /gscratch/scrubbed/sr320/1117c/{}.sam



find /gscratch/scrubbed/sr320/1117/*.sam | \
xargs basename -s .sam | \
xargs -I{} /gscratch/srlab/programs/samtools-1.9/samtools \
view -b -q 30 /gscratch/scrubbed/sr320/1117c/{}.sam -o /gscratch/scrubbed/sr320/1117c/{}.bam


#for file in $files
#do
    #echo ${file} # print the filename to terminal screen
    #bowtie2 -q -x GCA_900700415 -U ${file}.fq|samtools view -b -q 30 > ${file}.bam #conduct the alignment and output the file
#done




#Explanation of terms:
#bowtie2 -q -x <bt2-idx> -U <r> -S <sam>
#-q query input files are in fastq format
#-x <bt2-idx> Indexed "reference genome" filename prefix (minus trailing .X.bt2).
#-U <r> Files with unpaired reads.

# The default of bowtie2, is to write the output of the alignment to the terminal. 
# Also, bowtie does not write BAM files directly, but SAM output can be converted to BAM on the fly by piping bowtie's output to samtools view. 
# samtools options
#  -b       output BAM
# -q <integer> : discards reads whose mapping quality is below this number


#for file in $files
#do
    #echo ${file} # print the filename to terminal screen
    #bowtie2 -q -x GCA_900700415 -U ${file}.fq|samtools view -b -q 30 > ${file}.bam #conduct the alignment and output the file
#done


#q, #u, #x, #bowtie2, #conduct, #do, #done, #echo, #explanation, #for, #sbatch

[code] #!/bin/bash ## Job Name...

#!/bin/bash
## Job Name
#SBATCH --job-name=c2c_l2
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=00-12:00:00
## Memory per node
#SBATCH --mem=100G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/scrubbed/sr320/1104c/



# Directories and programs
bismark_dir="/gscratch/srlab/programs/Bismark-0.21.0"
#bowtie2_dir="/gscratch/srlab/programs/bowtie2-2.3.4.1-linux-x86_64/"
#samtools="/gscratch/srlab/programs/samtools-1.9/samtools"




source /gscratch/srlab/programs/scripts/paths.sh



find /gscratch/srlab/sr320/data/geoduck/cov_files/*_R1_001_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz \
| xargs basename -s _R1_001_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz | xargs -I{} ${bismark_dir}/coverage2cytosine \
--genome_folder /gscratch/srlab/sr320/data/geoduck/v074 \
-o {}_ \
--merge_CpG \
/gscratch/srlab/sr320/data/geoduck/cov_files/{}_R1_001_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz

#bowtie2_dir, #samtools, #sbatch

[code]#!/bin/bash ## Job Name #SBATCH...

#!/bin/bash
## Job Name
#SBATCH --job-name=nl-02
## Allocation Definition
#SBATCH --account=coenv
#SBATCH --partition=coenv
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=20-00:00:00
## Memory per node
#SBATCH --mem=100G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/scrubbed/sr320/1009b/


module load ddocent.module


#source /gscratch/srlab/programs/scripts/paths.sh

cd /gscratch/scrubbed/sr320/cragig_wd/

/gscratch/srlab/programs/dDocent-2.7.6/dDocent \
config_cragig.txt

#sbatch, #source

[code]#!/bin/bash ## Job Name #SBATCH...

#!/bin/bash
## Job Name
#SBATCH --job-name=bm
## Allocation Definition
#SBATCH --account=coenv
#SBATCH --partition=coenv
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=06-00:00:00
## Memory per node
#SBATCH --mem=100G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/scrubbed/sr320/0923/



# Directories and programs
bismark_dir="/gscratch/srlab/programs/Bismark-0.21.0"
bowtie2_dir="/gscratch/srlab/programs/bowtie2-2.3.4.1-linux-x86_64/"
samtools="/gscratch/srlab/programs/samtools-1.9/samtools"
reads_dir="/gscratch/srlab/sr320/data/caligus/"



source /gscratch/srlab/programs/scripts/paths.sh




find ${reads_dir}*_L001_R1_001_val_1_val_1.fq.gz \
| xargs basename -s _L001_R1_001_val_1_val_1.fq.gz | xargs -I{} ${bismark_dir}/bismark \
--path_to_bowtie ${bowtie2_dir} \
-genome /gscratch/srlab/sr320/data/geoduck/v074 \
-p 4 \
-score_min L,0,-0.6 \
-1 /gscratch/srlab/sr320/data/caligus/{}_L001_R1_001_val_1_val_1.fq.gz \
-2 /gscratch/srlab/sr320/data/caligus/{}_L001_R2_001_val_2_val_2.fq.gz \



find *.bam | \
xargs basename -s .bam | \
xargs -I{} ${bismark_dir}/deduplicate_bismark \
--bam \
--paired \
{}.bam



${bismark_dir}/bismark_methylation_extractor \
--bedGraph --counts --scaffolds \
--multicore 14 \
--buffer_size 75% \
*deduplicated.bam



# Bismark processing report

${bismark_dir}/bismark2report

#Bismark summary report

${bismark_dir}/bismark2summary



# Sort files for methylkit and IGV

find *deduplicated.bam | \
xargs basename -s .bam | \
xargs -I{} ${samtools} \
sort --threads 28 {}.bam \
-o {}.sorted.bam

# Index sorted files for IGV
# The "-@ 16" below specifies number of CPU threads to use.

find *.sorted.bam | \
xargs basename -s .sorted.bam | \
xargs -I{} ${samtools} \
index -@ 28 {}.sorted.bam

#bismark, #sbatch

[code]cat 20190814_BmrkCalig.sh #!/bin/bash ## Job...

cat 20190814_BmrkCalig.sh
#!/bin/bash
## Job Name
#SBATCH --job-name=BismarkAlign_Calig
## Allocation Definition 
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes 
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=14-23:30:00
## Memory per node
#SBATCH --mem=100G
##turn on e-mail notification
#SBATCH --mail-type=ALL
#SBATCH --mail-user=strigg@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/scrubbed/strigg/analyses/20190814_Calig
#SBATCH --constraint="skylake|broadwell"

#align with bismark

%%bash

find /gscratch/scrubbed/strigg/TRIMG_adapt_5bp/TRIM_cat/Sealice*R1_001_val_1.fq.gz \
| xargs basename -s _R1_001_val_1.fq.gz| xargs -I{} /gscratch/srlab/programs/Bismark-0.19.0/bismark \
--path_to_bowtie /gscratch/srlab/programs/bowtie2-2.3.4.1-linux-x86_64 \
--samtools_path /gscratch/srlab/programs/samtools-1.9 \
--score_min L,0,-0.6 \
-p 4 \
--non_directional \
--dovetail \
--genome /gscratch/srlab/strigg/data/Caligus/GENOMES \
-1 /gscratch/scrubbed/strigg/TRIMG_adapt_5bp/TRIM_cat/{}_R1_001_val_1.fq.gz \
-2 /gscratch/scrubbed/strigg/TRIMG_adapt_5bp/TRIM_cat/{}_R2_001_val_2.fq.gz \
-o /gscratch/scrubbed/strigg/analyses/20190814_Calig

#run deduplicaiton
%%bash
/gscratch/srlab/programs/Bismark-0.19.0/deduplicate_bismark \
--bam -p \
/gscratch/scrubbed/strigg/analyses/20190814_Calig/*.bam \
-o /gscratch/scrubbed/strigg/analyses/20190814_Calig/ \
2> /gscratch/scrubbed/strigg/analyses/20190814_Calig/dedup.err \
--samtools_path /gscratch/srlab/programs/samtools-1.9/


#create summary report
cat /gscratch/scrubbed/strigg/analyses/20190814_Calig/*PE_report.txt | \
grep -E 'Mapping\ efficiency\:|paired-end|Sequence|C methylated' \
cat - /gscratch/scrubbed/strigg/analyses/20190814_Calig/*.deduplication_report.txt | \
grep 'Mapping\ efficiency\:\|removed' \
> /gscratch/scrubbed/strigg/analyses/20190814_Calig/mapping_dedup_summary.txt

#run methylation extractor
/gscratch/srlab/programs/Bismark-0.19.0/bismark_methylation_extractor \
--paired-end --bedGraph --counts --scaffolds \
--multicore 28 \
/gscratch/scrubbed/strigg/analyses/20190814_Calig/*deduplicated.bam \
-o /gscratch/scrubbed/strigg/analyses/20190814_Calig/ \
--samtools /gscratch/srlab/programs/samtools-1.9/samtools \
2> /gscratch/scrubbed/strigg/analyses/20190814_Calig/bme.err

#create bismark reports for individual samlpes
/gscratch/srlab/programs/Bismark-0.19.0/bismark2report

#create bismark summary report for all samples
/gscratch/srlab/programs/Bismark-0.19.0/bismark2summary

#Run coverage2cytosine command to generate cytosine coverage files
find /gscratch/scrubbed/strigg/analyses/20190814_Calig/*.cov.gz \
| xargs basename -s _R1_001_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz \
| xargs -I{} /gscratch/srlab/programs/Bismark-0.19.0/coverage2cytosine --gzip \
--genome_folder /gscratch/srlab/strigg/data/Caligus/GENOMES \
-o /gscratch/scrubbed/strigg/analyses/20190814_Calig/{}_cytosine_CpG_cov_report \
/gscratch/scrubbed/strigg/analyses/20190814_Calig/{}_R1_001_val_1__bismark_bt2_pe.deduplicated.bismark.cov.gz

#compile and sort bams for methylkit
find /gscratch/scrubbed/strigg/analyses/20190814_Calig/*deduplicated.bam| \
xargs basename -s _R1_001_val_1_bismark_bt2_pe.deduplicated.bam | xargs -I{} /gscratch/srlab/programs/samtools-1.9/samtools \
sort /gscratch/scrubbed/strigg/analyses/20190814_Calig/{}_R1_001_val_1_bismark_bt2_pe.deduplicated.bam \
-o /gscratch/scrubbed/strigg/analyses/20190814_Calig/{}.dedup.sorted.bam



#align, #compile, #create, #run, #sbatch