[code]#!/bin/bash ## Job Name -...

#!/bin/bash
## Job Name - can be changed
#SBATCH --job-name=bs-geo
## Allocation Definition - confirm correctness
#SBATCH --account=coenv
#SBATCH --partition=coenv
## Resources
## Nodes (often you will only use 1)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=30-00:00:00
## Memory per node
#SBATCH --mem=100G
## email notification
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir= /gscratch/scrubbed/sr320/0719/
# Exit script if a command fails
# set -e

##########################
# This is a script written to assess bisulfite sequencing reads
# using Bismark. The user needs to supply the following:
# 1. A single directory location contaning BSseq reads.
# 2. BSseq reads need to be gzipped FastQ and end with .fq.gz
# 3. A bisulfite-converted genome, produced with Bowtie2.
# 4. Indicate if deduplication should be performed (whole genome sbator reduced genome sequencing)
#
# Set these values below



### USER NEEDS TO SET VARIABLES FOR THE FOLLOWING:
# Set --workdir= path in SBATCH header above.
#
# Full path to directory with sequencing reads
reads_dir="/gscratch/srlab/strigg/data/Pgenr/FASTQS"

# Full path to bisulftie-converted genome directory
genome_dir="/gscratch/srlab/sr320/data/geoduck/v074"

# Enter y (for yes) or n (for no) between the quotes.
# Yes - Whole genome bisulfite sequencing, MBD.
# No - Reduced genome bisulfite sequencing (e.g. RRBS)
deduplicate="No"

# Run Bismark on desired number of reads/pairs subset
# The default value is 0, which will run Bismark on all reads/pairs
subset="-u 0"

####################################################
# DO NOT EDIT BELOW THIS LINE
####################################################




# Evaluate user-edited variables to make sure they have been filled
[ -z ${deduplicate} ] \
&& { echo "The deduplicate variable is not defined. Please edit the SBATCH script and add y or n to deduplicate variable."; exit 1; }

[ -z ${genome_dir} ] \
&& { echo "The bisulfite genome directory path has not been set. Please edit the SBATCH script."; exit 1; }

[ -z ${reads_dir} ] \
&& { echo "The reads directory path has not been set. Please edit the SBATCH script."; exit 1; }



# Directories and programs
wd=$(pwd)
bismark_dir="/gscratch/srlab/programs/Bismark-0.21.0_dev"
bowtie2_dir="/gscratch/srlab/programs/bowtie2-2.3.4.1-linux-x86_64/"
samtools="/gscratch/srlab/programs/samtools-1.9/samtools"
threads="28"
reads_list="input_fastqs.txt"

## Concatenated FastQ Files
R1=""
R2=""

# Initialize arrays
R1_array=()
R2_array=()

# Create list of input FastQ files for easier confirmation.
for fastq in ${reads_dir}/*.fq.gz
do
  echo ${fastq##*/} >> ${reads_list}
done

# Check for paired-end
# Capture grep output
# >0 means single-end reads
# set +e/set -e prevents error >0 from exiting script
set +e
grep "_R2_" ${reads_list}
paired=$?
set -e

# Confirm even number of FastQ files
num_files=$(wc -l < ${reads_list})
fastq_even_odd=$(echo $(( ${num_files} % 2 )) )


## Save FastQ files to arrays
R1_array=(${reads_dir}/*_R1_*.fq.gz)
## Send comma-delimited list of R1 FastQ to variable
R1=$(echo ${R1_array[@]} | tr " " ",")

# Evaluate if paired-end FastQs
# Run Bismark as paired-end/single-end based on evaluation
if [[ ${paired} -eq 0 ]]; then
  # Evaluate if FastQs have corresponding partner (i.e. R1 and R2 files)
  # Evaluated on even/odd number of files.
  if [[ ${fastq_even_odd} -ne 0 ]]; then
    { echo "Missing at least one FastQ pair from paired-end FastQ set."; \
      echo "Please verify input FastQs all have an R1 and corresponding R2 file.";
      exit 1; \
    }
  fi
  ## Save FastQ files to arrays
  R2_array=(${reads_dir}/*_R2_*.fq.gz)
  ## Send comma-delimited list of R2 FastQ to variable
  R2=$(echo ${R2_array[@]} | tr " " ",")
  # Run bismark using bisulftie-converted genome
  # Generates a set of BAM files as outputs
  # Records stderr to a file for easy viewing of Bismark summary info
  ${bismark_dir}/bismark \
  --path_to_bowtie2 ${bowtie2_dir} \
  --genome ${genome_dir} \
  --samtools_path=${samtools} \
  --non_directional \
  --score_min L,0,-0.6 \
  ${subset} \
  -p ${threads} \
  -1 ${R1} \
  -2 ${R2} \
  2> bismark_summary.txt
else
  # Run Bismark single-end
  ${bismark_dir}/bismark \
  --path_to_bowtie2 ${bowtie2_dir} \
  --genome ${genome_dir} \
  --samtools_path=${samtools} \
  --non_directional \
  ${subset} \
  -p ${threads} \
  ${R1} \
  2> bismark_summary.txt
fi



# Determine if deduplication is necessary
# Then, determine if paired-end or single-end
if [ ${deduplicate} == "y"  ]; then
  # Sort Bismark BAM files by read names instead of chromosomes
  find *.bam \
  | xargs basename -s .bam \
  | xargs -I bam_basename \
  ${samtools} sort \
  --threads ${threads} \
  -n bam_basename.bam \
  -o bam_basename.sorted.bam
  if [ ${paired} -eq 0 ]; then
    # Deduplication
    find *sorted.bam \
    | xargs basename -s .bam \
    | xargs -I bam_basename \
    ${bismark_dir}/deduplicate_bismark \
    --paired \
    --samtools_path=${samtools} \
    bam_basename.bam
  else
    find *sorted.bam \
    | xargs basename -s .bam \
    | xargs -I bam_basename \
    ${bismark_dir}/deduplicate_bismark \
    --single \
    --samtools_path=${samtools} \
    bam_basename.bam
  fi
  # Methylation extraction
  # Extracts methylation info from deduplicated BAM files produced by Bismark
  # Options to created a bedgraph file, a cytosine coverage report, counts, remove spaces from names
  # and to use the "scaffolds" setting.
  ${bismark_dir}/bismark_methylation_extractor \
  --bedGraph \
  --cytosine_report \
  --genome_folder ${genome_dir} \
  --gzip
  --counts \
  --scaffolds \
  --remove_spaces \
  --multicore ${threads} \
  --buffer_size 75% \
  --samtools_path=${samtools} \
  *deduplicated.bam
  # Sort deduplicated BAM files
  find *deduplicated.bam \
  | xargs basename -s .bam \
  | xargs -I bam_basename \
  ${samtools} sort \
  --threads ${threads} \
  bam_basename.bam \
  -o bam_basename.sorted.bam
  # Index sorted files for IGV
  # The "-@ ${threads}" below specifies number of CPU threads to use.
  find *deduplicated.sorted.bam \
  | xargs -I sorted_bam \
  ${samtools} index \
  -@ ${threads} \
  sorted_bam
else
  # Methylation extraction
  # Extracts methylation info from BAM files produced by Bismark
  # Options to created a bedgraph file, a cytosine coverage report, counts, remove spaces from names
  # and to use the "scaffolds" setting.
  ${bismark_dir}/bismark_methylation_extractor \
  --bedGraph \
  --cytosine_report \
  --genome_folder ${genome_dir} \
  --gzip \
  --counts \
  --scaffolds \
  --remove_spaces \
  --multicore ${threads} \
  --buffer_size 75% \
  --samtools_path=${samtools} \
  *.bam

  # Sort BAM files
  find *.bam \
  | xargs basename -s .bam \
  | xargs -I bam_basename \
  ${samtools} sort \
  --threads ${threads} \
  -o bam_basename.sorted.bam
  # Index sorted files for IGV
  # The "-@ ${threads}" below specifies number of CPU threads to use.
  find *sorted.bam \
  | xargs -I sorted_bam \
  ${samtools} index \
  -@ ${threads} \
  sorted_bam
fi


# Bismark processing report
# Generates HTML reports from previously created files
${bismark_dir}/bismark2report

#Bismark summary report
# Generates HTML summary reports from previously created files
${bismark_dir}/bismark2summary

#bismark, #sbatch

[code][sr320@mox2 2019]$ cat 0626_1311.sh #!/bin/bash...

[sr320@mox2 2019]$ cat 0626_1311.sh
#!/bin/bash
## Job Name - can be changed
#SBATCH --job-name=bs-dedup-ch
## Allocation Definition - confirm correctness
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (often you will only use 1)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=30-00:00:00
## Memory per node
#SBATCH --mem=100G
## email notification
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir= /gscratch/scrubbed/sr320/0626_1311/
# Exit script if a command fails
# set -e

##########################
# This is a script written to assess bisulfite sequencing reads
# using Bismark. The user needs to supply the following:
# 1. A single directory location contaning BSseq reads.
# 2. BSseq reads need to be gzipped FastQ and end with .fq.gz
# 3. A bisulfite-converted genome, produced with Bowtie2.
# 4. Indicate if deduplication should be performed (whole genome or reduced genome sequencing)
#
# Set these values below



### USER NEEDS TO SET VARIABLES FOR THE FOLLOWING:
# Set --workdir= path in SBATCH header above.
#
# Full path to directory with sequencing reads
reads_dir="/gscratch/srlab/strigg/data/Pgenr/FASTQS"

# Full path to bisulftie-converted genome directory
genome_dir="/gscratch/srlab/sr320/data/geoduck/v074"

# Enter y (for yes) or n (for no) between the quotes.
# Yes - Whole genome bisulfite sequencing, MBD.
# No - Reduced genome bisulfite sequencing (e.g. RRBS)
deduplicate="YES"

# Run Bismark on desired number of reads/pairs subset
# The default value is 0, which will run Bismark on all reads/pairs
subset="-u 100000"

####################################################
# DO NOT EDIT BELOW THIS LINE
####################################################




# Evaluate user-edited variables to make sure they have been filled
[ -z ${deduplicate} ] \
&& { echo "The deduplicate variable is not defined. Please edit the SBATCH script and add y or n to deduplicate variable."; exit 1; }

[ -z ${genome_dir} ] \
&& { echo "The bisulfite genome directory path has not been set. Please edit the SBATCH script."; exit 1; }

[ -z ${reads_dir} ] \
&& { echo "The reads directory path has not been set. Please edit the SBATCH script."; exit 1; }



# Directories and programs
wd=$(pwd)
bismark_dir="/gscratch/srlab/programs/Bismark-0.21.0_dev"
bowtie2_dir="/gscratch/srlab/programs/bowtie2-2.3.4.1-linux-x86_64/"
samtools="/gscratch/srlab/programs/samtools-1.9/samtools"
threads="28"
reads_list="input_fastqs.txt"

## Concatenated FastQ Files
R1=""
R2=""

# Initialize arrays
R1_array=()
R2_array=()

# Create list of input FastQ files for easier confirmation.
for fastq in ${reads_dir}/*.fq.gz
do
  echo ${fastq##*/} >> ${reads_list}
done

# Check for paired-end
# Capture grep output
# >0 means single-end reads
# set +e/set -e prevents error >0 from exiting script
set +e
grep "_R2_" ${reads_list}
paired=$?
set -e

# Confirm even number of FastQ files
num_files=$(wc -l < ${reads_list})
fastq_even_odd=$(echo $(( ${num_files} % 2 )) )


## Save FastQ files to arrays
R1_array=(${reads_dir}/*_R1_*.fq.gz)
## Send comma-delimited list of R1 FastQ to variable
R1=$(echo ${R1_array[@]} | tr " " ",")

# Evaluate if paired-end FastQs
# Run Bismark as paired-end/single-end based on evaluation
if [[ ${paired} -eq 0 ]]; then
  # Evaluate if FastQs have corresponding partner (i.e. R1 and R2 files)
  # Evaluated on even/odd number of files.
  if [[ ${fastq_even_odd} -ne 0 ]]; then
    { echo "Missing at least one FastQ pair from paired-end FastQ set."; \
      echo "Please verify input FastQs all have an R1 and corresponding R2 file.";
      exit 1; \
    }
  fi
  ## Save FastQ files to arrays
  R2_array=(${reads_dir}/*_R2_*.fq.gz)
  ## Send comma-delimited list of R2 FastQ to variable
  R2=$(echo ${R2_array[@]} | tr " " ",")
  # Run bismark using bisulftie-converted genome
  # Generates a set of BAM files as outputs
  # Records stderr to a file for easy viewing of Bismark summary info
  ${bismark_dir}/bismark \
  --path_to_bowtie2 ${bowtie2_dir} \
  --genome ${genome_dir} \
  --samtools_path=${samtools} \
  --non_directional \
  --score_min L,0,-0.6 \
  ${subset} \
  -p ${threads} \
  -1 ${R1} \
  -2 ${R2} \
  2> bismark_summary.txt
else
  # Run Bismark single-end
  ${bismark_dir}/bismark \
  --path_to_bowtie2 ${bowtie2_dir} \
  --genome ${genome_dir} \
  --samtools_path=${samtools} \
  --non_directional \
  ${subset} \
  -p ${threads} \
  ${R1} \
  2> bismark_summary.txt
fi



# Determine if deduplication is necessary
# Then, determine if paired-end or single-end
if [ ${deduplicate} == "y"  ]; then
  # Sort Bismark BAM files by read names instead of chromosomes
  find *.bam \
  | xargs basename -s .bam \
  | xargs -I bam_basename \
  ${samtools} sort \
  --threads ${threads} \
  -n bam_basename.bam \
  -o bam_basename.sorted.bam
  if [ ${paired} -eq 0 ]; then
    # Deduplication
    find *sorted.bam \
    | xargs basename -s .bam \
    | xargs -I bam_basename \
    ${bismark_dir}/deduplicate_bismark \
    --paired \
    --samtools_path=${samtools} \
    bam_basename.bam
  else
    find *sorted.bam \
    | xargs basename -s .bam \
    | xargs -I bam_basename \
    ${bismark_dir}/deduplicate_bismark \
    --single \
    --samtools_path=${samtools} \
    bam_basename.bam
  fi
  # Methylation extraction
  # Extracts methylation info from deduplicated BAM files produced by Bismark
  # Options to created a bedgraph file, a cytosine coverage report, counts, remove spaces from names
  # and to use the "scaffolds" setting.
  ${bismark_dir}/bismark_methylation_extractor \
  --bedGraph \
  --cytosine_report \
  --genome_folder ${genome_dir} \
  --gzip
  --counts \
  --scaffolds \
  --remove_spaces \
  --multicore ${threads} \
  --buffer_size 75% \
  --samtools_path=${samtools} \
  *deduplicated.bam
  # Sort deduplicated BAM files
  find *deduplicated.bam \
  | xargs basename -s .bam \
  | xargs -I bam_basename \
  ${samtools} sort \
  --threads ${threads} \
  bam_basename.bam \
  -o bam_basename.sorted.bam
  # Index sorted files for IGV
  # The "-@ ${threads}" below specifies number of CPU threads to use.
  find *deduplicated.sorted.bam \
  | xargs -I sorted_bam \
  ${samtools} index \
  -@ ${threads} \
  sorted_bam
else
  # Methylation extraction
  # Extracts methylation info from BAM files produced by Bismark
  # Options to created a bedgraph file, a cytosine coverage report, counts, remove spaces from names
  # and to use the "scaffolds" setting.
  ${bismark_dir}/bismark_methylation_extractor \
  --bedGraph \
  --cytosine_report \
  --genome_folder ${genome_dir} \
  --gzip \
  --counts \
  --scaffolds \
  --remove_spaces \
  --multicore ${threads} \
  --buffer_size 75% \
  --samtools_path=${samtools} \
  *.bam

  # Sort BAM files
  find *.bam \
  | xargs basename -s .bam \
  | xargs -I bam_basename \
  ${samtools} sort \
  --threads ${threads} \
  -o bam_basename.sorted.bam
  # Index sorted files for IGV
  # The "-@ ${threads}" below specifies number of CPU threads to use.
  find *sorted.bam \
  | xargs -I sorted_bam \
  ${samtools} index \
  -@ ${threads} \
  sorted_bam
fi


# Bismark processing report
# Generates HTML reports from previously created files
${bismark_dir}/bismark2report

#Bismark summary report
# Generates HTML summary reports from previously created files
${bismark_dir}/bismark2summary[sr320@mox2 2019]$ 

#bismark, #sbatch

[code] #!/bin/bash ## Job Name...

#!/bin/bash
## Job Name
#SBATCH --job-name=oly-mbd
## Allocation Definition
#SBATCH --account=coenv
#SBATCH --partition=coenv
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=00-100:00:00
## Memory per node
#SBATCH --mem=100G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/2019/0327



# Directories and programs
wd=$(pwd)
bismark_dir="/gscratch/srlab/programs/Bismark-0.21.0"
bowtie2_dir="/gscratch/srlab/programs/bowtie2-2.3.4.1-linux-x86_64/"
samtools="/gscratch/srlab/programs/samtools-1.9/samtools"
reads_dir="/gscratch/srlab/sr320/data/olurida-bs/"



source /gscratch/srlab/programs/scripts/paths.sh


find ${reads_dir}*_s456_trimmed.fq.gz \
| xargs basename -s _s456_trimmed.fq.gz | xargs -I{} ${bismark_dir}/bismark \
--path_to_bowtie ${bowtie2_dir} \
-genome /gscratch/srlab/sr320/data/olurida-genomes/v081 \
-p 14 \
--non_directional \
${reads_dir}/{}_s456_trimmed.fq.gz 




${bismark_dir}/deduplicate_bismark \
--bam -p \
*.bam


${bismark_dir}/bismark_methylation_extractor \
--bedGraph --counts --scaffolds \
--multicore 14 \
*deduplicated.bam



# Bismark processing report

${bismark_dir}/bismark2report

#Bismark summary report

${bismark_dir}/bismark2summary



# Sort files for methylkit and IGV

find *deduplicated.bam | \
xargs basename -s .bam | \
xargs -I{} ${samtools} \
sort --threads 28 {}.bam \
-o {}.sorted.bam

# Index sorted files for IGV
# The "-@ 16" below specifies number of CPU threads to use.

find *.sorted.bam | \
xargs basename -s .sorted.bam | \
xargs -I{} ${samtools} \
index -@ 28 {}.sorted.bam

#bismark, #sbatch

[code] [sr320@mox2 jobs]$ cat 0212_1230.sh...

[sr320@mox2 jobs]$ cat 0212_1230.sh 
#!/bin/bash
## Job Name
#SBATCH --job-name=geoy-blast
## Allocation Definition 
#SBATCH --account=coenv
#SBATCH --partition=coenv
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1   
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=3-00:00:00
## Memory per node
#SBATCH --mem=100
##turn on e-mail notification
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/0212

/gscratch/srlab/programs/ncbi-blast-2.6.0+/bin/blastn \
-task blastn \
-query /gscratch/srlab/sr320/data/geoduck/Pgenerosa_transcriptome_v5.fasta
-db  /gscratch/srlab/sr320/blastdb/Pgenerosa_v071 \
-out /gscratch/srlab/sr320/analyses/1119/geo-tran-v071.tab \
-evalue 1e-20 \
-max_target_seqs 1 \
-outfmt 6 \
-num_threads 28

#sbatch

Mapping quant-seq to genome

[sr320@mox2 jobs]$ cat 1117_1500.sh 
#!/bin/bash
## Job Name
#SBATCH --job-name=ls-bow
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=5-100:00:00
## Memory per node
#SBATCH --mem=500G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/1117/

source /gscratch/srlab/programs/scripts/paths.sh





find /gscratch/srlab/sr320/data/ls-tag/*.gz | xargs basename -s L006_R1_001.fastq.gz | xargs -I{} bowtie2 \
-x /gscratch/srlab/sr320/data/oly/Olurida_v081.bowtie-index \
-U /gscratch/srlab/sr320/data/ls-tag/{}L006_R1_001.fastq.gz \
-p 28 \
-q \
-S /gscratch/srlab/sr320/analyses/1117/{}_01_bowtie2.sam

#sbatch

[code]#!/bin/bash ## Job Name #SBATCH...

#!/bin/bash
## Job Name
#SBATCH --job-name=ks-oly-blast
## Allocation Definition 
#SBATCH --account=coenv
#SBATCH --partition=coenv
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1   
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=3-00:00:00
## Memory per node
#SBATCH --mem=100
##turn on e-mail notification
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/1105

/gscratch/srlab/programs/ncbi-blast-2.6.0+/bin/blastn \
-task blastn \
-query /gscratch/srlab/sr320/data/oly/Trinity.fasta \
-db /gscratch/srlab/sr320/data/oly/Olurida_v081 \
-out /gscratch/srlab/sr320/analyses/1105/ks-trinity-v081.tab \
-evalue 1e-20 \
-outfmt 6 \
-num_threads 28

#sbatch

[code][sr320@mox1 jobs]$ cat 1029_1500.sh #!/bin/bash...

[sr320@mox1 jobs]$ cat 1029_1500.sh 
#!/bin/bash
## Job Name
#SBATCH --job-name=bow64
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=5-100:00:00
## Memory per node
#SBATCH --mem=100G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/1030

source /gscratch/srlab/programs/scripts/paths.sh


for fastq in /gscratch/scrubbed/sr320/Phred64_fqs/*1.fq.gz; do
  read1=$(echo "$fastq")
  read1_array+=("$read1")
done

for fastq in /gscratch/scrubbed/sr320/Phred64_fqs/*2.fq.gz; do
  read2=$(echo "$fastq")
  read2_array+=("$read2")
done

for pair in "${!read1_array[@]}"; do
  i=${read1_array[$pair]}
  j=${read2_array[$pair]}
  filename="${i##*/}"
  no_ext="${filename%%.*}"
  /gscratch/srlab/programs/bowtie2-2.3.4.1-linux-x86_64/bowtie2 \
  -x /gscratch/srlab/sr320/data/new_genome_files/chinook_genome_masked \
  -1 "$i" \
  -2 "$j" \
  -X 2000 --sensitive  --no-mixed --phred64 --no-discordant --no-unal \
  -S /gscratch/scrubbed/sr320/cw/1017/"$no_ext"_bowtie2.sam \
  -q \
  -p 28
done

OUTPUT: /var/services/homes/charlie/out/1101

[sr320@mox1 jobs]$ cat 1029_1600.sh 
#!/bin/bash
## Job Name
#SBATCH --job-name=bow33
## Allocation Definition
#SBATCH --account=coenv
#SBATCH --partition=coenv
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=5-100:00:00
## Memory per node
#SBATCH --mem=100G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/1029

source /gscratch/srlab/programs/scripts/paths.sh


for fastq in /gscratch/scrubbed/sr320/Phred33_fqs/*1.fq.gz; do
  read1=$(echo "$fastq")
  read1_array+=("$read1")
done

for fastq in /gscratch/scrubbed/sr320/Phred33_fqs/*2.fq.gz; do
  read2=$(echo "$fastq")
  read2_array+=("$read2")
done

for pair in "${!read1_array[@]}"; do
  i=${read1_array[$pair]}
  j=${read2_array[$pair]}
  filename="${i##*/}"
  no_ext="${filename%%.*}"
  /gscratch/srlab/programs/bowtie2-2.3.4.1-linux-x86_64/bowtie2 \
  -x /gscratch/srlab/sr320/data/new_genome_files/chinook_genome_masked \
  -1 "$i" \
  -2 "$j" \
  -X 2000 --sensitive  --no-mixed --phred33 --no-discordant --no-unal \
  -S /gscratch/scrubbed/sr320/cw/"$no_ext"_bowtie2.sam \
  -q \
  -p 28
done

OUTPUT: /var/services/homes/charlie/out/1100

#sbatch