diff --git a/.test/config/config.yml b/.test/config/config.yml index dd50d27..2287ae7 100644 --- a/.test/config/config.yml +++ b/.test/config/config.yml @@ -33,3 +33,7 @@ panaroo: fastani: skip: False extra: "" + +rgi: + skip: False + extra: "--clean --alignment_tool DIAMOND" diff --git a/README.md b/README.md index def7e9b..7cc520d 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,10 @@ _Workflow overview:_ 1. NCBI's Prokaryotic Genome Annotation Pipeline ([PGAP](https://github.com/ncbi/pgap)). Note: needs to be installed manually 2. [prokka](https://github.com/tseemann/prokka), a fast and light-weight prokaryotic annotation tool 3. [bakta](https://github.com/oschwengers/bakta), a fast, alignment-free annotation tool. Note: Bakta will automatically download its companion database from zenodo (light: 1.5 GB, full: 40 GB) -3. Create a QC report for the assemblies using [Quast](https://github.com/ablab/quast) -4. Create a pangenome analysis (orthologs/homologs) using [Panaroo](https://gthlab.au/panaroo/) +3. Predict antimicrobial resistance (AMR) genes using [RGI](https://github.com/arpcard/rgi) +4. Create a QC report for the assemblies using [Quast](https://github.com/ablab/quast) +5. Create a pangenome analysis (orthologs/homologs) using [Panaroo](https://gthlab.au/panaroo/) +6. Compute pairwise average nucleotide identity (ANI) between the assemblies using [FastANI](https://github.com/ParBLiSS/FastANI) and plot a phylogenetic tree based on the ANI distances. ## Installation diff --git a/config/README.md b/config/README.md index 43c77d8..bbd83fd 100644 --- a/config/README.md +++ b/config/README.md @@ -7,9 +7,10 @@ A Snakemake workflow for the post-processing of microbial genome assemblies. 1. NCBI's Prokaryotic Genome Annotation Pipeline ([PGAP](https://github.com/ncbi/pgap)). Note: needs to be installed manually 2. [prokka](https://github.com/tseemann/prokka), a fast and light-weight prokaryotic annotation tool 3. [bakta](https://github.com/oschwengers/bakta), a fast, alignment-free annotation tool. Note: Bakta will automatically download its companion database from zenodo (light: 1.5 GB, full: 40 GB) -3. Create a QC report for the assemblies using [Quast](https://github.com/ablab/quast) -4. Create a pangenome analysis (orthologs/homologs) using [Panaroo](https://gthlab.au/panaroo/) -5. Compute pairwise average nucleotide identity (ANI) between the assemblies using [FastANI](https://github.com/ParBLiSS/FastANI) and plot a phylogenetic tree based on the ANI distances. +3. Predict antimicrobial resistance (AMR) genes using [RGI](https://github.com/arpcard/rgi) +4. Create a QC report for the assemblies using [Quast](https://github.com/ablab/quast) +5. Create a pangenome analysis (orthologs/homologs) using [Panaroo](https://gthlab.au/panaroo/) +6. Compute pairwise average nucleotide identity (ANI) between the assemblies using [FastANI](https://github.com/ParBLiSS/FastANI) and plot a phylogenetic tree based on the ANI distances. ## Running the workflow diff --git a/config/config.yml b/config/config.yml index dd50d27..2287ae7 100644 --- a/config/config.yml +++ b/config/config.yml @@ -33,3 +33,7 @@ panaroo: fastani: skip: False extra: "" + +rgi: + skip: False + extra: "--clean --alignment_tool DIAMOND" diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml index b7f32c9..d333dab 100644 --- a/config/schemas/config.schema.yml +++ b/config/schemas/config.schema.yml @@ -133,6 +133,17 @@ properties: type: string description: Extra command-line arguments for FastANI default: "" + rgi: + type: object + properties: + skip: + type: boolean + description: Whether to skip RGI analysis + default: false + extra: + type: string + description: Extra command-line arguments for RGI + default: "--clean --alignment_tool DIAMOND" required: - samplesheet - tool @@ -143,3 +154,4 @@ required: - quast - panaroo - fastani + - rgi diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 26f2d8d..167dd89 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -50,6 +50,13 @@ def get_panaroo_fasta(wildcards): def get_final_input(wildcards): inputs = [] + for tool in config["tool"]: + inputs += expand( + "results/annotation/{tool}/{sample}/{sample}.{ext}", + tool=tool, + sample=samples.index, + ext=["gff", "fna"], + ) inputs += expand( "results/qc/quast/report.txt", ) @@ -62,6 +69,12 @@ def get_final_input(wildcards): inputs += expand( "results/qc/fastani/summary.txt", ) + if not config["rgi"]["skip"]: + inputs += expand( + "results/qc/rgi/{sample}/result.{ext}", + sample=samples.index, + ext=["txt", "json"], + ) return inputs diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index c146e26..daf902f 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -124,3 +124,20 @@ rule panaroo: {params.extra} \ > {log} 2>&1 """ + + +rule rgi_detection: + input: + fasta=rules.get_fasta.output.fasta, + output: + multiext("results/qc/rgi/{sample}/result", ".txt", ".json"), + log: + "results/qc/rgi/{sample}/result.log", + threads: max(workflow.cores * 0.25, 1) + params: + input_type="contig", + extra=config["rgi"]["extra"], + message: + """--- Running RGI to detect antibiotic resistance genes ---""" + wrapper: + "https://raw.githubusercontent.com/MPUSP/mpusp-snakemake-wrappers/refs/heads/main/rgi"