[code] -> Done reading data...

	-> Done reading data waiting for calculations to finish
	-> Done waiting for threads
	-> Output filenames:
		->"AllSamples_Maf1_association_genotypes.arg"
		->"AllSamples_Maf1_association_genotypes.mafs.gz"
		->"AllSamples_Maf1_association_genotypes.lrt0.gz"
		->"AllSamples_Maf1_association_genotypes.geno.gz"
	-> Sun Jun 17 10:22:12 2018
	-> Arguments and parameters for all analysis are located in .arg file
	-> Total number of sites analyzed: 786914862
	-> Number of sites retained after filetering: 0 
	[ALL done] cpu-time used =  194271.81 sec
	[ALL done] walltime used =  74941.00 sec

[code]#!/bin/bash ## Job Name #SBATCH...

#!/bin/bash
## Job Name
#SBATCH --job-name=angsd-3
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=10-100:00:00
## Memory per node
#SBATCH --mem=70G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/0616





source /gscratch/srlab/programs/scripts/paths.sh


/gscratch/srlab/sr320/programs/angsd/angsd \
-bam /gscratch/srlab/sr320/data/cw/all_bam.bamlist \
-out AllSamples_Maf1_association_genotypes \
-sites /gscratch/srlab/sr320/data/cw/AllSamples_Maf1_sites \
-doAsso 1 \
-yBin /gscratch/srlab/sr320/data/cw/YBin_file \
-GL 1 \
-doMaf 1 \
-doMajorMinor 3 \
-minMaf 0.05 \
-SNP_pval 1e-6 \
-minInd 468 \
-minQ 20 \
-P 28 \
-doGeno 2 \
-doPost 1 \
-postCutoff 0.95 \
-doCounts 1 \
-geno_minDepth 10

#sbatch

DML Analysis: How to get GOterms

Gene Set Enrichment Analysis Workflow:

  • Get Entrez Gene IDs
  • Match IDs with GOterms
  • Use both topGO and DAVID for enrichment

Problem:

  • The gene IDs found in the C. virginica GFF files are not official, NCBI Entrez Gene IDs. Not sure what LOC{} is, but XM_{} are Genbank IDs. Genbank IDs from the GFF were not recognized by DAVID

Solution:

  • blastx to get Uniprot accession codes and GOterms
  • Use Uniprot and GOterms in DAVID
  • Convert Uniprot accession codes to Entrez IDs
  • Use Entrez IDs and GOterms in DAVID

[code]#!/bin/bash ## Job Name #SBATCH...

#!/bin/bash
## Job Name
#SBATCH --job-name=angsd-maf
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=10-100:00:00
## Memory per node
#SBATCH --mem=70G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/0615b





source /gscratch/srlab/programs/scripts/paths.sh


/gscratch/srlab/sr320/programs/angsd/angsd \
-b /gscratch/srlab/sr320/data/cw/all_bam.bamlist \
-ref /gscratch/srlab/sr320/data/cw/chinook_genome_v1.fasta \
-out AllSamples_Maf1 \
-GL 1 \
-doMaf 1 \
-doMajorMinor 1 \
-minMaf 0.05 \
-SNP_pval 1e-6 \
-minInd 468 \
-minQ 20 \
-P 28 \
-setMinDepth 468 \
-setMaxDepth 10000 \
-doCounts 1 \
-doDepth 1 \
-dumpCounts 1

#sbatch

[code]#!/bin/bash ## Job Name #SBATCH...

#!/bin/bash
## Job Name
#SBATCH --job-name=angsd
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=10-100:00:00
## Memory per node
#SBATCH --mem=400G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/0615





source /gscratch/srlab/programs/scripts/paths.sh


/gscratch/srlab/sr320/programs/angsd/angsd \
-b /gscratch/srlab/sr320/data/cw/all_bam.bamlist \
-ref /gscratch/srlab/sr320/data/cw/chinook_genome_v1.fasta \
-out AllSamples_MinQ20 \
-GL 1 \
-doMaf 2 \
-doMajorMinor 1 \
-minMaf 0.05 \
-SNP_pval 1e-6 \
-minInd 468 \
-minQ 20 \
-P 28

#sbatch

[code][sr320@mox2 cw]$ scp sr320@eagle.fish.washington.edu:/var/services/web/Charlie_Waters/* .[/code]

[sr320@mox2 cw]$ scp sr320@eagle.fish.washington.edu:/var/services/web/Charlie_Waters/* .

[code]#!/bin/bash ## Job Name #SBATCH...

#!/bin/bash
## Job Name
#SBATCH --job-name=blastp
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Resources
## Nodes (We only get 1, so this is fixed)
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=10-100:00:00
## Memory per node
#SBATCH --mem=70G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=sr320@uw.edu
## Specify the working directory for this job
#SBATCH --workdir=/gscratch/srlab/sr320/analyses/0614





source /gscratch/srlab/programs/scripts/paths.sh


/gscratch/srlab/programs/ncbi-blast-2.6.0+/bin/blastp  \
-query /gscratch/srlab/sr320/query/GCF_002022765.2_C_virginica-3.0_protein.faa \
-db /gscratch/srlab/sr320/blastdb/uniprot_sprot_080917 \
-max_target_seqs 1 \
-evalue 1E-20 \
-outfmt 6 \
-num_threads 28 \
-out Cv_sprot.blastout

#sbatch