Skip to content

Commit c9b9ab7

Browse files
author
Emrah Akkoyun
committed
workflow is changed
1 parent 0ae73b1 commit c9b9ab7

22 files changed

+490
-60
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
workflow/.snakemake
2+
workflow/logs
3+
results

config/config.yml

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,36 @@
11
cnfigfile: "config.yml"
22

33
#outputs
4-
workdir: "/cta/users/eakkoyun/WORKFOLDER/TEST/210221_test4/phylogeny-snakemake"
5-
query_fasta: ["P08922","P22304","P13569"]
4+
workdir: "/cta/users/eakkoyun/WORKFOLDER/TEST/010321_test/phylogeny-snakemake"
5+
query_ids: ["P08922","P22304","P13569"]
66

77
#blast
88
blastdb_folder: "resources/blastdb"
99
blastdb_file: "all_eu.fasta"
1010
outfmt: "0"
11-
max_target_seqs: "200"
11+
#max_target_seqs: "5000"
12+
max_target_seqs: "1000" #sil
13+
num_iterations: "2"
1214

1315
#parse blast
14-
blast_hit_number: "200"
16+
#blast_hit_number: "1000"
17+
blast_hit_number: "200" #sil
1518
max_e_value: "0.00001"
1619
min_identity: "30"
1720
max_identity: "95"
1821

1922
#alignment
20-
mafft_method: "-linsi" #if left empty it will be FFT-NS-2
23+
mafft_method: "-fftns" #if left empty it will be FFT-NS-2
2124

2225
#trim alignment
23-
trimal_method: "-gt 0.8"
26+
trimal_method: "-gappyout"
2427

2528
#tree:
26-
raxml_model: "LG4M"
29+
raxml_model: "LG4X"
2730
raxml_seed: "2"
28-
raxml_threads: "12"
29-
raxml_tree_number: "20"
31+
raxml_threads: "1"
32+
#raxml_tree_number: "20"
33+
raxml_tree_number: "4" #sil
3034

3135
#iqtree
3236
iqtree_seed: "1234"
@@ -52,5 +56,10 @@ cleandata: "0"
5256
method: "1"
5357

5458
#phylas
55-
weights: "0,1,2,3,X,CountNodes_1,CountNodes_2,CountNodes_3,CountNodes_4"
59+
weights: "0.1,0.5,0,1,2,3,5,mean,median,X,CountNodes_1,CountNodes_2,CountNodes_3,CountNodes_4"
5660
pattern: "CountNodes_1"
61+
62+
#raxmlng_ancestral
63+
raxmlng_ancestral_model: "LG4M"
64+
raxml_ancestral_threads: "12"
65+

workflow/Snakefile

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,15 @@ configfile: "../config/config.yml"
22

33
rule all:
44
input:
5-
expand("{workdir}/results/{query_fasta}/5_scores/{query_fasta}_wol_param_{pattern}.csv",workdir=config["workdir"], query_fasta=config["query_fasta"],pattern=config["pattern"]),
6-
expand("{workdir}/results/{query_fasta}/5_scores/{query_fasta}_wl_param_{pattern}.csv",workdir=config["workdir"], query_fasta=config["query_fasta"],pattern=config["pattern"]),
5+
expand("{workdir}/results/{query_id}/5_raxmlng_ancestral_scores/{query_id}_wl_param_{pattern}.csv",workdir=config["workdir"], query_id=config["query_ids"],pattern=config["pattern"]),
76

8-
include : "rules/blastp.smk"
7+
include : "rules/query_fasta.smk"
8+
include : "rules/psiblast.smk"
99
include : "rules/get_blasthits.smk"
10-
include : "rules/header_update.smk"
1110
include : "rules/msa.smk"
1211
include : "rules/trim_msa.smk"
13-
include : "rules/remove_gaps.smk"
1412
include : "rules/ml_tree.smk"
1513
include : "rules/unroot_tree.smk"
16-
include : "rules/run_codeml.smk"
17-
include : "rules/compute_score.smk"
14+
include : "rules/remove_gaps.smk"
15+
include : "rules/raxmlng_ancestral.smk"
16+
include : "rules/compute_raxml_anc_score.smk"

workflow/envs/prune.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
name: ete3
2+
channels:
3+
- etetoolkit
4+
- python
5+
- bioconda
6+
dependencies:
7+
- ete3 =3.1.1
8+
- python =3.7.4

workflow/envs/r-base.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ dependencies:
1111
- r-tidytree = 0.3.3
1212
- r-stringr = 1.4.0
1313
- r-dplyr = 1.0.2
14+
- r-bio3d = 2.4.1
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
rule raxml_anc_score:
2+
input:
3+
tree_file = "{workdir}/results/{query_id}/4_raxmlng_ancestral/{query_id}.raxml.ancestralTree",
4+
probabilities = "{workdir}/results/{query_id}/4_raxmlng_ancestral/{query_id}.raxml.ancestralProbs",
5+
output:
6+
"{workdir}/results/{query_id}/5_raxmlng_ancestral_scores/{query_id}_wol_param_{pattern}.csv",
7+
"{workdir}/results/{query_id}/5_raxmlng_ancestral_scores/{query_id}_wl_param_{pattern}.csv",
8+
params:
9+
out = "{workdir}/results/{query_id}/5_raxmlng_ancestral_scores/{query_id}",
10+
fasta = "{workdir}/results/{query_id}/2_msa/{query_id}_nogap_msa.fasta",
11+
query_fasta = "{workdir}/results/{query_id}/1_psiblast/{query_id}.fasta"
12+
log:
13+
"{workdir}/workflow/logs/rules/{query_id}_raxmlanc_{pattern}_compute_score.err"
14+
benchmark:
15+
"{workdir}/workflow/logs/benchmarks/{query_id}_raxmlanc_{pattern}_compute_score.out"
16+
cache: True
17+
conda:
18+
"../envs/r-base.yml"
19+
shell:
20+
"query=`python scripts/get_query.py {params.query_fasta}` && Rscript scripts/compute_score_RaxmlNg.R {input.tree_file} {input.probabilities} {params.fasta} {params.out} $query {config[weights]} 2>{log}"

workflow/rules/get_blasthits.smk

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
rule get_blasthits:
22
input:
3-
blastp_out = "{workdir}/results/{query_fasta}/1_blastp/{query_fasta}_blasthits.out",
4-
blastdb = expand("{workdir}/{blastdb_folder}/{blastdb}", workdir=config["workdir"], blastdb_folder=config["blastdb_folder"], blastdb=config["blastdb_file"]),
5-
query_fasta = "{workdir}/resources/query_fasta/{query_fasta}.fasta",
3+
blastp_out = "{workdir}/results/{query_id}/1_psiblast/{query_id}_blasthits.out",
4+
query_fasta = "{workdir}/results/{query_id}/1_psiblast/{query_id}.fasta",
5+
blastdb = expand("{workdir}/{blastdb_folder}/{blastdb}", workdir=config["workdir"], blastdb_folder=config["blastdb_folder"], blastdb=config["blastdb_file"])
66
output:
7-
"{workdir}/results/{query_fasta}/1_blastp/{query_fasta}_blasthits.fasta",
7+
blast_fasta = "{workdir}/results/{query_id}/1_psiblast/{query_id}_blasthits.fasta",
88
resources:
99
time_min=300
1010
conda:
1111
"../envs/python.yml"
1212
log:
13-
"{workdir}/workflow/logs/rules/{query_fasta}_get_blasthits.err"
13+
"{workdir}/workflow/logs/rules/{query_id}_get_blasthits.err"
1414
benchmark:
15-
"{workdir}/workflow/logs/benchmarks/{query_fasta}_get_blasthits.out"
15+
"{workdir}/workflow/logs/benchmarks/{query_id}_get_blasthits.out"
1616
cache: True
1717
shell:
18-
"python3 scripts/parse_blastp.py {input.blastp_out} {config[blast_hit_number]} {config[max_e_value]} {config[min_identity]} {config[max_identity]} {input.blastdb} {input.query_fasta} 2> {log}"
18+
"python3 scripts/parse_blastp.py {input.blastp_out} {config[blast_hit_number]} {config[max_e_value]} {config[min_identity]} {input.blastdb} {input.query_fasta} 2> {log}"

workflow/rules/ml_tree.smk

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
rule ml_tree:
22
input:
3-
trimmed_msa = "{workdir}/results/{query_fasta}/2_msa/{query_fasta}_trimmed_msa.fasta",
3+
trimmed_msa = "{workdir}/results/{query_id}/2_msa/{query_id}_trimmed_msa.fasta",
44
output:
5-
bestTree = "{workdir}/results/{query_fasta}/3_mltree/{query_fasta}.raxml.bestTree",
5+
bestTree = "{workdir}/results/{query_id}/3_mltree/{query_id}.raxml.bestTree"
66
params:
7-
raxml_out_name = "{query_fasta}",
7+
raxml_out_name = "{query_id}",
88
conda:
99
"../envs/raxml-ng.yml"
10+
log:
11+
"{workdir}/workflow/logs/rules/{query_id}_mltree.err"
1012
benchmark:
11-
"{workdir}/workflow/logs/benchmarks/{query_fasta}_ml_tree.out"
13+
"{workdir}/workflow/logs/benchmarks/{query_id}_ml_tree.out"
1214
cache: True
1315
resources:
1416
time_min=7200,cpus=16
1517
shell:
16-
"scripts/raxml-ng.sh {input.trimmed_msa} {config[raxml_model]} {params.raxml_out_name} {config[raxml_seed]} {output.bestTree} {config[raxml_tree_number]}"
18+
"scripts/raxml-ng.sh {input.trimmed_msa} {config[raxml_model]} {params.raxml_out_name} {config[raxml_seed]} {output.bestTree} {config[raxml_tree_number]} 2>{log}"

workflow/rules/msa.smk

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
rule msa:
22
input:
3-
fasta = "{workdir}/results/{query_fasta}/1_blastp/{query_fasta}_blasthits_new_header.fasta",
3+
fasta = "{workdir}/results/{query_id}/1_psiblast/{query_id}_blasthits.fasta",
44
output:
5-
msa_file = "{workdir}/results/{query_fasta}/2_msa/{query_fasta}_blasthits_new_header_msa.fasta",
5+
msa_file = "{workdir}/results/{query_id}/2_msa/{query_id}_blasthits_msa.fasta",
66
conda:
77
"../envs/mafft.yml"
88
log:
9-
"{workdir}/workflow/logs/rules/{query_fasta}_msa.err"
9+
"{workdir}/workflow/logs/rules/{query_id}_msa.err"
1010
benchmark:
11-
"{workdir}/workflow/logs/benchmarks/{query_fasta}_msa.out"
11+
"{workdir}/workflow/logs/benchmarks/{query_id}_msa.out"
1212
cache: True
1313
resources:
1414
cpus=8

workflow/rules/psiblast.smk

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
rule psiblastp:
2+
input:
3+
blastdb = expand("{workdir}/{blastdb_folder}/{blastdb}", workdir=config["workdir"], blastdb_folder=config["blastdb_folder"], blastdb=config["blastdb_file"]),
4+
query_fasta = "{workdir}/results/{query_id}/1_psiblast/{query_id}.fasta"
5+
output:
6+
outfile = "{workdir}/results/{query_id}/1_psiblast/{query_id}_blasthits.out"
7+
log:
8+
"{workdir}/workflow/logs/rules/{query_id}_psiblastp.err"
9+
benchmark:
10+
"{workdir}/workflow/logs/benchmarks/{query_id}_psiblastp.out"
11+
cache: True
12+
conda:
13+
"../envs/blastp.yml"
14+
shell:
15+
"psiblast -query {input.query_fasta} -db {input.blastdb} -outfmt {config[outfmt]} -out {output.outfile} -max_target_seqs {config[max_target_seqs]} -num_iterations {config[num_iterations]} 2> {log}"

0 commit comments

Comments
 (0)