@@ -45,13 +45,12 @@ if (!bamindex) {
4545 output:
4646 file ' bamfile.bai' into bamfile_index
4747
48- module ' bioinfo-tools'
49- module " $params . modules . samtools "
50-
51- // We only need one core for this part
5248 executor choose_executor()
5349 queue ' core'
54- time params. short_job
50+ time params. runtime. simple
51+
52+ module ' bioinfo-tools'
53+ module " $params . modules . samtools "
5554
5655 when: ' indexbam' in workflowSteps
5756
@@ -73,24 +72,24 @@ process manta {
7372 output:
7473 file ' manta.vcf' into manta_vcf
7574
76- publishDir params. outdir, mode: ' copy'
77-
78- module ' bioinfo-tools'
79- module " $params . modules . manta "
75+ publishDir params. outdir, mode: ' copy' , saveAs: { " $params . prefix $it " }
8076
8177 errorStrategy { task. exitStatus == 143 ? ' retry' : ' terminate' }
82- time { params. long_job * 2 ** (task. attempt-1 ) }
78+ time { params. runtime . caller * 2 ** (task. attempt-1 ) }
8379 maxRetries 3
8480 queue ' core'
8581 cpus 4
8682
83+ module ' bioinfo-tools'
84+ module " $params . modules . manta "
85+
8786 when: ' manta' in workflowSteps
8887
8988 script:
9089 """
9190 configManta.py --normalBam bamfile --referenceFasta $params . ref_fasta --runDir testRun
9291 cd testRun
93- ./runWorkflow.py -m local -j $p arams . threads
92+ ./runWorkflow.py -m local -j \$ SLURM_CPUS_ON_NODE
9493 mv results/variants/diploidSV.vcf.gz ../manta.vcf.gz
9594 cd ..
9695 gunzip -c manta.vcf.gz > manta.vcf
@@ -113,13 +112,12 @@ if (!params.fastq) {
113112 output:
114113 file ' fastq.fq.gz' into fastq
115114
116- module ' bioinfo-tools'
117- module " $params . modules . samtools "
118-
119- // We only need one core for this part
120115 executor choose_executor()
121116 queue ' core'
122- time params. short_job
117+ time params. runtime. simple
118+
119+ module ' bioinfo-tools'
120+ module " $params . modules . samtools "
123121
124122 when: ' fastq' in workflowSteps
125123
@@ -140,7 +138,12 @@ process fermikit {
140138 output:
141139 file ' fermikit.vcf' into fermi_vcf
142140
143- publishDir params. outdir, mode: ' copy'
141+ publishDir params. outdir, mode: ' copy' , saveAs: { " $params . prefix $it " }
142+
143+ errorStrategy { task. exitStatus == 143 ? ' retry' : ' terminate' }
144+ time { params. runtime. fermikit * 2 ** ( task. attempt - 1 ) }
145+ maxRetries 3
146+ queue ' node'
144147
145148 module ' bioinfo-tools'
146149 module " $params . modules . fermikit "
@@ -152,9 +155,9 @@ process fermikit {
152155
153156 script:
154157 """
155- fermi2.pl unitig -s $p arams . genome_size -t$p arams . threads -l $p arams . readlen -p sample sample.fq.gz > sample.mak
158+ fermi2.pl unitig -s3g -t\$ SLURM_CPUS_ON_NODE -l150 -p sample sample.fq.gz > sample.mak
156159 make -f sample.mak
157- run-calling -t$p arams . threads $params . ref_fasta sample.mag.gz > calling.sh
160+ run-calling -t\$ SLURM_CPUS_ON_NODE $params . ref_fasta sample.mag.gz > calling.sh
158161 bash calling.sh
159162 vcf-sort -c sample.sv.vcf.gz > fermikit.vcf
160163 bgzip -c fermikit.vcf > fermikit.vcf.gz
@@ -182,12 +185,9 @@ process mask_beds {
182185 output:
183186 file ' *_masked.vcf' into masked_vcfs
184187
185- publishDir params. outdir, mode: ' copy'
186-
187- // Does not use many resources, run it locally
188188 executor choose_executor()
189189 queue ' core'
190- time params. short_job
190+ time params. runtime . simple
191191
192192 module ' bioinfo-tools'
193193 module " $params . modules . bedtools "
@@ -203,32 +203,25 @@ process mask_beds {
203203
204204
205205// To make intersect files we need to combine them into one channel with
206- // toSortedList() (fermi is before manta in alphabet). And also figure out if we
207- // have one or two files, therefore the tap and count_vcfs.
208- masked_vcfs. tap { count_vcfs_tmp }
209- .tap { masked_vcfs }
206+ // toSortedList() (fermi is before manta in alphabet).
207+ masked_vcfs. tap { masked_vcfs }
208+ .filter( ~/ manta|fermikit/ )
210209 .toSortedList(). set { intersect_input }
211- count_vcfs_tmp. count(). set { count_vcfs }
212210
213211process intersect_files {
214212 input:
215213 set file(fermi_vcf), file(manta_vcf) from intersect_input
216- val nvcfs from count_vcfs
217214 output:
218215 file " combined_masked.vcf" into intersections
219- file " combined_masked*.vcf"
220216
221- publishDir params. outdir, mode: ' copy'
222-
223- // Does not use many resources, run it locally
224217 executor choose_executor()
225218 queue ' core'
226- time params. short_job
219+ time params. runtime . simple
227220
228221 module ' bioinfo-tools'
229222 module " $params . modules . bedtools "
230223
231- when: nvcfs == 2
224+ when: ' make_intersect ' in workflowSteps
232225
233226 script:
234227 """
@@ -255,13 +248,13 @@ process variant_effect_predictor {
255248 input:
256249 file infile from annotate_files. tap { annotate_files }
257250 output:
258- file ' *.vep' into vep_outfiles
251+ file ' *.vep.vcf '
259252
260- publishDir params. outdir, mode: ' copy'
253+ publishDir params. outdir, mode: ' copy' , saveAs: { " $p arams . prefix $i t " }
261254
262255 executor choose_executor()
263256 queue ' core'
264- time params. short_job
257+ time params. runtime . simple
265258
266259 module ' bioinfo-tools'
267260 module " $params . modules . vep "
@@ -270,28 +263,28 @@ process variant_effect_predictor {
270263
271264 script:
272265 """
273- infile ="$infile "
274- outfile ="\$ infile. vep"
275- vep_cache ="/sw/data/uppnex/vep/84"
276- assembly=" $p arams . vep . assembly "
277-
278- case "\$ infile " in
279- *vcf) format ="vcf" ;;
280- *bed) format ="ensembl" ;;
281- *) printf "Unrecognized format for '%s'" "\$ infile " >&2
266+ INFILE ="$infile "
267+ OUTFILE ="\$ {INFILE%.vcf}. vep.vcf "
268+ VEP_CACHE ="/sw/data/uppnex/vep/84"
269+ ASSEMBLY="GRCh37 "
270+
271+ case "\$ INFILE " in
272+ *vcf) FORMAT ="vcf" ;;
273+ *bed) FORMAT ="ensembl" ;;
274+ *) printf "Unrecognized format for '%s'" "\$ INFILE " >&2
282275 exit 1;;
283276 esac
284277
285- ## If the input file is empty, just copy
286- if [ \$ ( wc -l "\$ infile" | awk '{print \$ 1}' ) -eq 0 ]; then
287- cp "\$ infile " "\$ outfile "
278+ ## If the input file is empty, just copy it
279+ if [[ -f "\$ INFILE" && -s " \$ INFILE" ] ]; then
280+ cp "\$ INFILE " "\$ OUTFILE "
288281 exit
289282 fi
290283
291284 variant_effect_predictor.pl \
292285 -i "\$ infile" \
293- --format "\$ format " \
294- -cache --dir "\$ vep_cache " \
286+ --format "\$ FORMAT " \
287+ -cache --dir "\$ VEP_CACHE " \
295288 -o "\$ outfile" \
296289 --vcf \
297290 --merged \
@@ -306,50 +299,51 @@ process variant_effect_predictor {
306299 --canonical \
307300 --ccds \
308301 --fields Consequence,Codons,Amino_acids,Gene,SYMBOL,Feature,EXON,PolyPhen,SIFT,Protein_position,BIOTYPE \
309- --assembly "\$ assembly " \
302+ --assembly "\$ ASSEMBLY " \
310303 --offline
311304 """
312305}
313306
314307process snpEff {
315308 input:
316- file vcf from annotate_files. tap { annotate_files }
309+ file infile from annotate_files. tap { annotate_files }
317310 output:
318- file ' *.snpeff'
311+ file ' *.snpeff.vcf '
319312
320- publishDir params. outdir, mode: ' copy'
313+ publishDir params. outdir, mode: ' copy' , saveAs: { " $p arams . prefix $i t " }
321314
322- module ' bioinfo-tools'
323- module " $params . modules . snpeff "
324-
325- // Does not use many resources, run it locally
326315 executor choose_executor()
327316 queue ' core'
328- time params. short_job
317+ time params. runtime. simple
318+
319+ module ' bioinfo-tools'
320+ module " $params . modules . snpeff "
321+ module " $params . modules . vt "
329322
330323 when: ' snpeff' in workflowSteps
331324
332325 script:
333326 """
334- vcf="$vcf " ## Use bash-semantics for variables
335- snpeffjar=''
327+ INFILE="$infile " ## Use bash-semantics for variables
328+ OUTFILE="\$ {INFILE%.vcf}.snpeff.vcf"
329+ SNPEFFJAR=''
336330
337- for p in \$ ( tr ':' ' ' <<<"\$ CLASSPATH" ); do
338- if [ -f "\$ p /snpEff.jar" ]; then
339- snpeffjar ="\$ p /snpEff.jar"
331+ for P in \$ ( tr ':' ' ' <<<"\$ CLASSPATH" ); do
332+ if [ -f "\$ P /snpEff.jar" ]; then
333+ SNPEFFJAR ="\$ P /snpEff.jar"
340334 break
341335 fi
342336 done
343- if [ -z "\$ snpeffjar " ]; then
337+ if [ -z "\$ SNPEFFJAR " ]; then
344338 printf "Can't find snpEff.jar in '%s'" "\$ CLASSPATH" >&2
345339 exit 1
346340 fi
347341
348- sed 's/ID=AD,Number=./ID=AD,Number=R/' "\$ vcf " \
342+ sed 's/ID=AD,Number=./ID=AD,Number=R/' "\$ INFILE " \
349343 | vt decompose -s - \
350344 | vt normalize -r $params . ref_fasta - \
351- | java -Xmx7G -jar "\$ snpeffjar " -formatEff -classic GRCh37.75 \
352- > "\$ vcf.snpeff "
345+ | java -Xmx7G -jar "\$ SNPEFFJAR " -formatEff -classic GRCh37.75 \
346+ > "\$ OUTFILE "
353347 """
354348}
355349
@@ -369,11 +363,10 @@ def usage_message() {
369363 log. info ' --help Show this message and exit'
370364 log. info ' --fastq Input fastqfile (default is bam but with fq as fileending)'
371365 log. info ' --steps Specify what steps to run, comma separated:'
372- log. info ' Callers: manta, fermikit, cnvnator (choose one or many)'
366+ log. info ' Callers: manta, fermikit (choose one or many)'
373367 log. info ' Annotation: vep OR snpeff'
374- log. info ' --long_job Running time for long job (callers, fermi and manta)'
375- log. info ' --short_job Running time for short jobs (bam indexing and bam2fq)'
376368 log. info ' --outdir Directory where resultfiles are stored'
369+ log. info ' --prefix Prefix for result filenames'
377370 log. info ' '
378371}
379372
@@ -439,6 +432,8 @@ def nextflow_running_as_slurmjob() {
439432 return false
440433}
441434
435+ /* If the nextflow deamon is running as a slurm job, we can use the local CPU
436+ * for a lot of our work */
442437def choose_executor() {
443438 return nextflow_running_as_slurmjob() ? ' local' : ' slurm'
444439}
@@ -462,5 +457,9 @@ def processWorkflowSteps(steps) {
462457 workflowSteps. push( ' fastq' )
463458 }
464459
460+ if (' manta' in workflowSteps && ' fermikit' in workflowSteps) {
461+ workflowSteps. push( ' make_intersect' )
462+ }
463+
465464 return workflowSteps
466465}
0 commit comments