Skip to content

Commit 493366c

Browse files
committed
Merge remote-tracking branch 'origin/main' into kzm-metabuli-dev
2 parents 4d44164 + 7c73025 commit 493366c

File tree

19 files changed

+93
-19
lines changed

19 files changed

+93
-19
lines changed

docs/assets/tables/all_inputs.tsv

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -474,9 +474,6 @@ cg_reorder_matrix disk_size Int Amount of storage (in GB) to allocate to the tas
474474
cg_reorder_matrix docker String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/staphb/mykrobe:0.12.1 Optional Snippy_Tree docker
475475
cg_reorder_matrix memory Int Amount of memory/RAM (in GB) to allocate to the task 2 Optional Snippy_Tree runtime
476476
cg_reorder_matrix outgroup_root String Tip name to root phylogenetic tree upon (overrides midpoint root) Optional Snippy_Tree general
477-
cg_snp_dists cpu Int Number of CPUs to allocate to the task 1 Optional Snippy_Tree runtime
478-
cg_snp_dists disk_size Int Amount of storage (in GB) to allocate to the task 50 Optional Snippy_Tree runtime
479-
cg_snp_dists memory Int Amount of memory/RAM (in GB) to allocate to the task 2 Optional Snippy_Tree runtime
480477
checkv_consensus checkv_db File CheckV database file gs://theiagen-public-resources-rp/reference_data/databases/checkv/checkv-db-v1.5.tar.gz Optional TheiaViral_ONT database
481478
checkv_consensus cpu Int Number of CPUs to allocate to the task 2 Optional TheiaViral_Illumina_PE, TheiaViral_ONT runtime
482479
checkv_consensus disk_size Int Amount of storage (in GB) to allocate to the task 100 Optional TheiaViral_Illumina_PE, TheiaViral_ONT runtime
@@ -628,7 +625,7 @@ core_reorder_matrix outgroup_root String Tip name to root phylogenetic tree upon
628625
core_snp_dists cpu Int Number of CPUs to allocate to the task 1 Optional Core_Gene_SNP, kSNP3, kSNP4 runtime
629626
core_snp_dists disk_size Int Amount of storage (in GB) to allocate to the task 50 Optional Core_Gene_SNP, kSNP3, kSNP4 runtime
630627
core_snp_dists docker String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/staphb/snp-dists:0.8.2 Optional Core_Gene_SNP, kSNP3, kSNP4 docker
631-
core_snp_dists memory Int Amount of memory/RAM (in GB) to allocate to the task 2 Optional Core_Gene_SNP, kSNP3, kSNP4 runtime
628+
core_snp_dists memory Int Amount of memory/RAM (in GB) to allocate to the task 4 Optional Core_Gene_SNP, kSNP3, kSNP4 runtime
632629
core_snp_sites cpu Int Number of CPUs to allocate to the task 1 Optional Core_Gene_SNP runtime
633630
core_snp_sites disk_size Int Amount of storage (in GB) to allocate to the task 100 Optional Core_Gene_SNP runtime
634631
core_snp_sites docker String Docker image to use for the task us-docker.pkg.dev/general-theiagen/staphb/snp-sites:2.5.1 Optional Core_Gene_SNP docker
@@ -1851,7 +1848,7 @@ pan_reorder_matrix outgroup_root String Tip name to root phylogenetic tree upon
18511848
pan_snp_dists cpu Int Number of CPUs to allocate to the task 1 Optional Core_Gene_SNP, kSNP3, kSNP4 runtime
18521849
pan_snp_dists disk_size Int Amount of storage (in GB) to allocate to the task 50 Optional Core_Gene_SNP, kSNP3, kSNP4 runtime
18531850
pan_snp_dists docker String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/staphb/snp-dists:0.8.2 Optional Core_Gene_SNP, kSNP3, kSNP4 docker
1854-
pan_snp_dists memory Int Amount of memory/RAM (in GB) to allocate to the task 2 Optional Core_Gene_SNP, kSNP3, kSNP4 runtime
1851+
pan_snp_dists memory Int Amount of memory/RAM (in GB) to allocate to the task 4 Optional Core_Gene_SNP, kSNP3, kSNP4 runtime
18551852
pangolin4 analysis_mode String Used to switch between usher and pangolearn analysis modes. Only use usher because pangolearn is no longer supported as of Pangolin v4.3 and higher versions. Optional Pangolin_Update general
18561853
pangolin4 cpu Int Number of CPUs to allocate to the task 4 Optional Pangolin_Update runtime
18571854
pangolin4 disk_size Int Amount of storage (in GB) to allocate to the task 100 Optional Pangolin_Update runtime
@@ -2268,7 +2265,10 @@ snippy_tree_wf snippy_core_disk_size Int Amount of storage (in GB) to allocate t
22682265
snippy_tree_wf snippy_core_docker String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/staphb/snippy:4.6.0 Optional Snippy_Streamline, Snippy_Streamline_FASTA, Snippy_Tree docker
22692266
snippy_tree_wf snippy_core_memory Int Amount of memory/RAM (in GB) to allocate to the task 16 Optional Snippy_Streamline, Snippy_Streamline_FASTA, Snippy_Tree runtime
22702267
snippy_tree_wf snippy_variants_qc_metrics Array[File] Files produced by the Snippy_Variants workflow used to concatenate the reports for each sample in the tree Optional Snippy_Tree general
2268+
snippy_tree_wf snp_dists_cpu Int Number of CPUs to allocate to the task 1 Optional Snippy_Streamline, Snippy_Streamline_FASTA, Snippy_Tree runtime
2269+
snippy_tree_wf snp_dists_disk_size Int Amount of storage (in GB) to allocate to the task 50 Optional Snippy_Streamline, Snippy_Streamline_FASTA, Snippy_Tree runtime
22712270
snippy_tree_wf snp_dists_docker String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/staphb/snp-dists:0.8.2 Optional Snippy_Streamline, Snippy_Streamline_FASTA, Snippy_Tree docker
2271+
snippy_tree_wf snp_dists_memory Int Amount of memory/RAM (in GB) to allocate to the task 4 Optional Snippy_Streamline, Snippy_Streamline_FASTA, Snippy_Tree runtime
22722272
snippy_tree_wf snp_sites_cpu Int Number of CPUs to allocate to the task 1 Optional Snippy_Streamline, Snippy_Streamline_FASTA, Snippy_Tree runtime
22732273
snippy_tree_wf snp_sites_disk_size Int Amount of storage (in GB) to allocate to the task 100 Optional Snippy_Streamline, Snippy_Streamline_FASTA, Snippy_Tree runtime
22742274
snippy_tree_wf snp_sites_docker String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/staphb/snp-sites:2.5.1 Optional Snippy_Streamline, Snippy_Streamline_FASTA, Snippy_Tree docker
@@ -2293,7 +2293,7 @@ snippy_variants_wf read2 File FASTQ file containing read2 sequences Optional Sn
22932293
snp_dists cpu Int Number of CPUs to allocate to the task 1 Optional Augur runtime
22942294
snp_dists disk_size Int Amount of storage (in GB) to allocate to the task 50 Optional Augur runtime
22952295
snp_dists docker String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/staphb/snp-dists:0.8.2 Optional Augur docker
2296-
snp_dists memory Int Amount of memory/RAM (in GB) to allocate to the task 2 Optional Augur runtime
2296+
snp_dists memory Int Amount of memory/RAM (in GB) to allocate to the task 4 Optional Augur runtime
22972297
sort_bam_assembly_correction cpu Int Number of CPUs to allocate to the task 2 Optional TheiaMeta_Illumina_PE runtime
22982298
sort_bam_assembly_correction disk_size Int Amount of storage (in GB) to allocate to the task 100 Optional TheiaMeta_Illumina_PE runtime
22992299
sort_bam_assembly_correction docker String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/staphb/samtools:1.17 Optional TheiaMeta_Illumina_PE docker
@@ -2798,9 +2798,6 @@ wg_reorder_matrix disk_size Int Amount of storage (in GB) to allocate to the tas
27982798
wg_reorder_matrix docker String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/staphb/mykrobe:0.12.1 Optional Snippy_Tree docker
27992799
wg_reorder_matrix memory Int Amount of memory/RAM (in GB) to allocate to the task 2 Optional Snippy_Tree runtime
28002800
wg_reorder_matrix outgroup_root String Tip name to root phylogenetic tree upon (overrides midpoint root) Optional Snippy_Tree general
2801-
wg_snp_dists cpu Int Number of CPUs to allocate to the task 1 Optional Snippy_Tree runtime
2802-
wg_snp_dists disk_size Int Amount of storage (in GB) to allocate to the task 50 Optional Snippy_Tree runtime
2803-
wg_snp_dists memory Int Amount of memory/RAM (in GB) to allocate to the task 2 Optional Snippy_Tree runtime
28042801
zip_files cpu Int Number of CPUs to allocate to the task 2 Optional Zip_Column_Content runtime
28052802
zip_files disk_size Int Amount of storage (in GB) to allocate to the task 100 Optional Zip_Column_Content runtime
28062803
zip_files docker_image String The Docker container to use for the task us-docker.pkg.dev/general-theiagen/theiagen/utility:1.1 Optional Zip_Column_Content docker

docs/assets/tables/all_outputs.tsv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ aligned_fai File Index file for the reference genome Clair3_Variants_ONT
4040
alignment_method String The method used to generate the alignment Freyja_FASTQ
4141
amr_results_csv File CSV formatted AMR profile AMR_Search
4242
amr_results_pdf File PDF formatted AMR profile AMR_Search
43+
amr_search_all_resistances String All resistances returned by AMRsearch AMR_Search, TheiaEuk_Illumina_PE, TheiaEuk_ONT, TheiaProk_FASTA, TheiaProk_Illumina_PE, TheiaProk_Illumina_SE, TheiaProk_ONT
44+
amr_search_associated_resistances String Resistances paired with their agent returned by AMRsearch AMR_Search, TheiaEuk_Illumina_PE, TheiaEuk_ONT, TheiaProk_FASTA, TheiaProk_Illumina_PE, TheiaProk_Illumina_SE, TheiaProk_ONT
4345
amr_search_csv File CSV formatted AMR profile TheiaEuk_Illumina_PE, TheiaEuk_ONT, TheiaProk_FASTA, TheiaProk_Illumina_PE, TheiaProk_Illumina_SE, TheiaProk_ONT
4446
amr_search_docker String Docker image used to run AMR_Search AMR_Search, TheiaEuk_Illumina_PE, TheiaEuk_ONT, TheiaProk_FASTA, TheiaProk_Illumina_PE, TheiaProk_Illumina_SE, TheiaProk_ONT
4547
amr_search_results File JSON formatted AMR profile including BLAST results AMR_Search, TheiaEuk_Illumina_PE, TheiaEuk_ONT, TheiaProk_FASTA, TheiaProk_Illumina_PE, TheiaProk_Illumina_SE, TheiaProk_ONT
@@ -384,6 +386,7 @@ fastqc_raw2_html File An HTML file that provides a graphical visualization of ra
384386
fastqc_version String Version of fastqc software used Freyja_FASTQ, TheiaCoV_Illumina_PE, TheiaCoV_Illumina_SE, TheiaEuk_Illumina_PE, TheiaMeta_Illumina_PE, TheiaProk_Illumina_PE, TheiaProk_Illumina_SE
385387
fetch_srr_accession_analysis_date String The date the fetch_srr_accession analysis was run. Fetch_SRR_Accession
386388
fetch_srr_accession_version String The version of the fetch_srr_accession workflow. Fetch_SRR_Accession
389+
file_translations File A tracking file to use for referencing original filenames and paths when identical files are indexed. Zip_Column_Content
387390
filtered_contigs_metrics File File containing metrics of contigs filtered TheiaEuk_Illumina_PE, TheiaEuk_ONT, TheiaProk_Illumina_PE, TheiaProk_Illumina_SE, TheiaProk_ONT
388391
flu_A_315675_resistance String resistance mutations to A_315675 TheiaCoV_FASTA, TheiaCoV_Illumina_PE, TheiaCoV_ONT
389392
flu_amantadine_resistance String resistance mutations to amantadine TheiaCoV_FASTA, TheiaCoV_Illumina_PE, TheiaCoV_ONT

docs/assets/tables/all_workflows.tsv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,4 @@ Name Description Applicable Kingdom Workflow Level Workflow Type Command-line Co
4949
[**Transfer_Column_Content**](../workflows/data_export/transfer_column_content.md) Transfer contents of a specified Terra data table column for many samples ("entities") to a GCP storage bucket location [Any taxa](../../workflows_overview/workflows_kingdom.md#any-taxa) Set-level [Exporting Data from Terra](../../workflows_overview/workflows_type.md#exporting-data-from-terra) Yes v1.3.0 [Transfer_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Transfer_Column_Content_PHB:main?tab=info)
5050
[**Usher**](../workflows/phylogenetic_placement/usher.md) Use UShER to rapidly and accurately place your samples on any existing phylogenetic tree Monkeypox virus, SARS-CoV-2, [Viral](../../workflows_overview/workflows_kingdom.md#viral) Sample-level, Set-level [Phylogenetic Placement](../../workflows_overview/workflows_type.md#phylogenetic-placement) Yes v2.1.0 [Usher_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Usher_PHB:main?tab=info)
5151
[**VADR_Update**](../workflows/genomic_characterization/vadr_update.md) Update VADR assignments HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, [Viral](../../workflows_overview/workflows_kingdom.md#viral), WNV Sample-level [Genomic Characterization](../../workflows_overview/workflows_type.md#genomic-characterization) Yes v4.0.0 [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info)
52-
[**Zip_Column_Content**](../workflows/data_export/zip_column_content.md) Zip contents of a specified Terra data table column for many samples ("entities") [Any taxa](../../workflows_overview/workflows_kingdom.md#any-taxa) Set-level [Exporting Data from Terra](../../workflows_overview/workflows_type.md#exporting-data-from-terra) Yes v2.1.0 [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info)
52+
[**Zip_Column_Content**](../workflows/data_export/zip_column_content.md) Zip contents of a specified Terra data table column for many samples ("entities") [Any taxa](../../workflows_overview/workflows_kingdom.md#any-taxa) Set-level [Exporting Data from Terra](../../workflows_overview/workflows_type.md#exporting-data-from-terra) Yes vX.X.X [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info)

docs/workflows/data_export/zip_column_content.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ title: Zip_Column_Content
1010

1111
This workflow will create a zip file containing all of the items from a given column in a Terra Data Table. This is useful when you want to share a collection of result files.
1212

13+
If a column contains files that do not have unique filenames then an index will be appended to identical filenames. Original file paths and names are preserved and referenced within the `file_translations.tsv` output.
14+
1315
### Inputs
1416

1517
This workflow runs on the _set_ level.

tasks/gene_typing/drug_resistance/task_amr_search.wdl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,30 @@ task amr_search {
2828
python3 /scripts/parse_amr_json.py \
2929
./~{samplename}_paarsnp_results.jsn \
3030
~{samplename}
31+
32+
# Fix carriage return characters
33+
sed -i 's/\r$//' "~{samplename}_amr_results.csv"
34+
35+
# Pull all resistances and place them into comma separated string similar to AMRFinder
36+
awk -F ',' '/Resistant/ {gsub(/; /, "\n", $3); print $3}' "~{samplename}_amr_results.csv" | sort -u | paste -sd ',' - > RESISTANCES
37+
38+
# Paired resistances with agent
39+
# Place into Agent_1: gene/mutation, gene/mutation; Agent_2: gene/mutation, gene/mutation; format
40+
awk -F ',' '/Resistant/ {gsub("; ", ", ", $3); printf "%s: %s; ", $1, $3}' "~{samplename}_amr_results.csv" | sed 's/; $//' > ASSOCIATED_RESISTANCES
41+
42+
if [[ ! -s RESISTANCES || "$(cat RESISTANCES)" == "none" ]]; then
43+
echo "No resistances reported" > RESISTANCES
44+
echo "No resistances reported" > ASSOCIATED_RESISTANCES
45+
fi
46+
3147
>>>
3248
output {
3349
File amr_search_json_output = "~{samplename}_paarsnp_results.jsn"
3450
File amr_search_output_csv = "~{samplename}_amr_results.csv"
3551
File amr_search_output_pdf = "~{samplename}_amr_results.pdf"
3652
String amr_search_version = read_string("output_amr_version.txt")
53+
String amr_search_all_resistances = read_string("RESISTANCES")
54+
String amr_search_associated_resistances = read_string("ASSOCIATED_RESISTANCES")
3755
String amr_search_docker_image = docker
3856
}
3957

tasks/phylogenetic_inference/utilities/task_snp_dists.wdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ task snp_dists {
66
String cluster_name
77
Int disk_size = 50
88
String docker = "us-docker.pkg.dev/general-theiagen/staphb/snp-dists:0.8.2"
9-
Int memory = 2
9+
Int memory = 4
1010
Int cpu = 1
1111
}
1212
command <<<

tasks/utilities/file_handling/task_zip_files.wdl

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,45 @@ task zip_files {
1616
command <<<
1717
file_array=(~{sep=' ' files_to_zip})
1818
mkdir ~{zipped_file_name}
19+
echo -e "origin_path\tnew_path" > file_translations.tsv
1920
20-
# move files oto a single directory before zipping
21-
for index in ${!file_array[@]}; do
22-
file=${file_array[$index]}
23-
mv ${file} ~{zipped_file_name}
21+
# move files into a single directory before zipping
22+
for file in "${file_array[@]}"; do
23+
24+
echo "DEBUG: Pulling $file"
25+
if [ -f "$file" ]; then
26+
echo "DEBUG: $file exists"
27+
filename=$(basename "$file") # Extract the filename (e.g., test.tsv)
28+
dest="~{zipped_file_name}/$filename"
29+
30+
# Counter is always set to 1 so that if there are
31+
# other duplicated filenames they will be counted as well.
32+
counter=1
33+
34+
echo "DEBUG: Checking for $file in $dest"
35+
# Check for duplicate files in the destination
36+
while [ -e "$dest" ]; do
37+
echo "DEBUG: Duplicate filename found, adding a file index for differentiation."
38+
dest="~{zipped_file_name}/${filename%.*}_${counter}.${filename##*.}"
39+
echo "DEBUG: New filename ${filename%.*}_${counter}.${filename##*.}"
40+
((counter++))
41+
done
42+
43+
# Move the file to the destination with the new name
44+
# If loop is not entered, filename will remain unchanged.
45+
mv "$file" "$dest"
46+
echo -e "$file\t$dest" >> file_translations.tsv
47+
48+
else
49+
echo "File not found: $file"
50+
fi
2451
done
2552
2653
zip -r ~{zipped_file_name}.zip ~{zipped_file_name}
2754
>>>
2855
output {
2956
File zipped_files = "~{zipped_file_name}.zip"
57+
File file_translations = "file_translations.tsv"
3058
}
3159
runtime {
3260
docker: "~{docker_image}"

tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@
456456
- path: miniwdl_run/wdl/tasks/taxon_id/contamination/task_midas.wdl
457457
md5sum: 481f5fbce5aa1abf93acd912797821cc
458458
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl
459-
md5sum: 68ca5ef61b5b66fcae09ce03b4d7de1a
459+
md5sum: e5626b062172e8bff267aa04cdedf5e9
460460
- path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl
461461
contains: ["version", "QC", "output"]
462462
- path: miniwdl_run/workflow.log

tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@
436436
- path: miniwdl_run/wdl/tasks/taxon_id/contamination/task_midas.wdl
437437
md5sum: 481f5fbce5aa1abf93acd912797821cc
438438
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl
439-
md5sum: 65d2d61e8a83a5b2b37ae50773ab04af
439+
md5sum: 815c06e0ff6c90c172be481684795dda
440440
- path: miniwdl_run/workflow.log
441441
contains: ["wdl", "theiaprok_illumina_se", "NOTICE", "done"]
442442

workflows/phylogenetics/wf_snippy_tree.wdl

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ workflow snippy_tree_wf {
5151
Int? iqtree2_ultrafast_bootstraps
5252
String? iqtree2_model
5353

54+
Int? snp_dists_cpu
55+
Int? snp_dists_memory
56+
Int? snp_dists_disk_size
5457
String? snp_dists_docker
5558

5659
Int? snp_sites_cpu
@@ -130,7 +133,10 @@ workflow snippy_tree_wf {
130133
input:
131134
alignment = select_first([gubbins.gubbins_polymorphic_fasta, snippy_core.snippy_full_alignment_clean]),
132135
cluster_name = tree_name_updated,
133-
docker = snp_dists_docker
136+
docker = snp_dists_docker,
137+
cpu = snp_dists_cpu,
138+
memory = snp_dists_memory,
139+
disk_size = snp_dists_disk_size
134140
}
135141
# mid-point roots the phylogenetic tree, and reorders the columns in the wgSNP matrix according to the tree tip order
136142
# NB the tree will remain a core genome tree is core_genome = true, and a whole-genome tree if core_genome = false
@@ -148,7 +154,10 @@ workflow snippy_tree_wf {
148154
input:
149155
alignment = select_first([snp_sites.snp_sites_multifasta]),
150156
cluster_name = tree_name_updated,
151-
docker = snp_dists_docker
157+
docker = snp_dists_docker,
158+
cpu = snp_dists_cpu,
159+
memory = snp_dists_memory,
160+
disk_size = snp_dists_disk_size
152161
}
153162
# reorders the columns in the cgSNP matrix according to the tree tip order
154163
# input tree is the midpoint rooted tree from the wg_reorder_matrix task, and midpoint rooting is turned off here, so the tree remains unchanged

0 commit comments

Comments
 (0)