@@ -68,42 +68,32 @@ else {
6868
6969process manta {
7070 input:
71- file ' bamfile_tmp ' from bamfile
71+ file ' bamfile ' from bamfile
7272 file ' bamfile.bai' from bamfile_index
7373 output:
74- file ' manta.bed' into manta_bed
7574 file ' manta.vcf' into manta_vcf
7675
7776 publishDir params. outdir, mode: ' copy'
7877
7978 module ' bioinfo-tools'
8079 module " $params . modules . manta "
8180
81+ errorStrategy { task. exitStatus == 143 ? ' retry' : ' terminate' }
82+ time { params. long_job * 2 ** (task. attempt-1 ) }
83+ maxRetries 3
84+ queue ' core'
85+ cpus 4
86+
8287 when: ' manta' in workflowSteps
8388
8489 script:
8590 """
86- # Manta follows symlinks and expects the index to be with the original
87- # file, so we copy it, and then clean up at script EXIT with a trap.
88- # TODO, this is fixed in manta v0.29.5, https://github.com/Illumina/manta/issues/32
89- DIR=`pwd`
90- function cleanup() {
91- echo "CLEAN UP"
92- cd \$ DIR
93- if [ -f bamfile ]; then
94- rm bamfile
95- fi
96- }
97- trap cleanup EXIT
98- cp bamfile_tmp bamfile
99-
10091 configManta.py --normalBam bamfile --referenceFasta $params . ref_fasta --runDir testRun
10192 cd testRun
10293 ./runWorkflow.py -m local -j $params . threads
10394 mv results/variants/diploidSV.vcf.gz ../manta.vcf.gz
10495 cd ..
10596 gunzip -c manta.vcf.gz > manta.vcf
106- SVvcf2bed.pl manta.vcf > manta.bed
10797 """
10898}
10999
@@ -120,7 +110,6 @@ if (!params.fastq) {
120110 process create_fastq {
121111 input:
122112 file ' bamfile' from bamfile
123-
124113 output:
125114 file ' fastq.fq.gz' into fastq
126115
@@ -149,7 +138,6 @@ process fermikit {
149138 input:
150139 file ' sample.fq.gz' from fastq
151140 output:
152- file ' fermikit.bed' into fermi_bed
153141 file ' fermikit.vcf' into fermi_vcf
154142
155143 publishDir params. outdir, mode: ' copy'
@@ -170,33 +158,29 @@ process fermikit {
170158 bash calling.sh
171159 vcf-sort -c sample.sv.vcf.gz > fermikit.vcf
172160 bgzip -c fermikit.vcf > fermikit.vcf.gz
173- SVvcf2bed.pl fermikit.vcf > fermikit.bed
174161 """
175162}
176163
177164
178-
179165// 3. Create summary files
180166
181167// Collect vcfs and beds into one channel
182- beds = manta_bed. mix( fermi_bed )
183168vcfs = manta_vcf. mix( fermi_vcf )
184169
185-
186170mask_files = [
187171 " $baseDir /data/ceph18.b37.lumpy.exclude.2014-01-15.bed" ,
188172 " $baseDir /data/LCR-hs37d5.bed.gz"
189173]
190174
191175masks = mask_files. collect { file(it) }. channel()
192176// Collect both bed files and combine them with the mask files
193- beds . spread( masks. buffer(size : 2 ) ). set { mask_input }
177+ vcfs . tap { vcfs } . spread( masks. buffer(size : 2 ) ). set { mask_input }
194178
195179process mask_beds {
196180 input:
197- set file(bedfile ), file(mask1), file(mask2) from mask_input
181+ set file(svfile ), file(mask1), file(mask2) from mask_input
198182 output:
199- file ' *_masked.bed ' into masked_beds
183+ file ' *_masked.vcf ' into masked_vcfs
200184
201185 publishDir params. outdir, mode: ' copy'
202186
@@ -209,29 +193,30 @@ process mask_beds {
209193 module " $params . modules . bedtools "
210194
211195 """
212- BNAME=\$ ( echo $b edfile | cut -d. -f1 )
213- MASK_FILE=\$ {BNAME}_masked.bed
214- cat $b edfile \
215- | bedtools intersect -v -a stdin -b $mask1 -f 0.25 \
216- | bedtools intersect -v -a stdin -b $mask2 -f 0.25 > \$ MASK_FILE
196+ BNAME=\$ ( echo $s vfile | cut -d. -f1 )
197+ MASK_FILE=\$ {BNAME}_masked.vcf
198+ cat $s vfile \
199+ | bedtools intersect -header - v -a stdin -b $mask1 -f 0.25 \
200+ | bedtools intersect -header - v -a stdin -b $mask2 -f 0.25 > \$ MASK_FILE
217201 """
218202}
219203
220204
221205// To make intersect files we need to combine them into one channel with
222- // toList() . And also figure out if we have one or two files, therefore the
223- // tap and count_beds .
224- masked_beds . tap { count_beds_tmp }
225- .tap { masked_beds }
226- .toList (). set { intersect_input }
227- count_beds_tmp . count(). set { count_beds }
206+ // toSortedList() (fermi is before manta in alphabet) . And also figure out if we
207+ // have one or two files, therefore the tap and count_vcfs .
208+ masked_vcfs . tap { count_vcfs_tmp }
209+ .tap { masked_vcfs }
210+ .toSortedList (). set { intersect_input }
211+ count_vcfs_tmp . count(). set { count_vcfs }
228212
229213process intersect_files {
230214 input:
231- set file(bed1 ), file(bed2 ) from intersect_input
232- val nbeds from count_beds
215+ set file(fermi_vcf ), file(manta_vcf ) from intersect_input
216+ val nvcfs from count_vcfs
233217 output:
234- file " combined_masked.bed" into intersections
218+ file " combined_masked.vcf" into intersections
219+ file " combined_masked*.vcf"
235220
236221 publishDir params. outdir, mode: ' copy'
237222
@@ -243,33 +228,28 @@ process intersect_files {
243228 module ' bioinfo-tools'
244229 module " $params . modules . bedtools "
245230
246- when: nbeds == 2
231+ when: nvcfs == 2
247232
248233 script:
249234 """
250- ## In case grep doesn't find anything it will exit with non-zero exit
251- ## status, which will cause slurm to abort the job, we want to continue on
252- ## error here.
253- set +e
254-
255- ## Create intersected bed files
235+ ## Create intersected vcf files
256236 for WORD in DEL INS DUP; do
257- intersectBed -a <( grep -w \$ WORD $bed1 ) -b <( grep -w \$ WORD $bed2 ) \
237+ intersectBed -a <( grep -w "^#.\\ +\\ |\$ WORD" $fermi_vcf ) \
238+ -b <( grep -w "^#.\\ +\\ |\$ WORD" $manta_vcf ) \
258239 -f 0.5 -r \
259- | sort -k1,1V -k2,2n > combined_masked_\$ {WORD,,}.bed
240+ | sort -k1,1V -k2,2n > combined_masked_\$ {WORD,,}.vcf
260241 done
261242
262- cat <( grep -v -w 'DEL\\ |INS\\ |DUP' $bed1 ) \
263- <( grep -v -w 'DEL\\ |INS\\ |DUP' $bed2 ) \
264- | sort -k1,1V -k2,2n > combined_masked_OTHER.bed
265-
266- sort -k1,1V -k2,2n combined_masked_*.bed >> combined_masked.bed
243+ cat <( grep -v -w '^#.\\ +\\ |DEL\\ |INS\\ |DUP' $fermi_vcf ) \
244+ <( grep -v -w '^#.\\ +\\ |DEL\\ |INS\\ |DUP' $manta_vcf ) \
245+ | cut -f 1-8 \
246+ | sort -k1,1V -k2,2n > combined_masked_OTHER.vcf
267247
268- set -e # Restore exit-settings
248+ sort -k1,1V -k2,2n combined_masked_*.vcf >> combined_masked.vcf
269249 """
270250}
271251
272- annotate_files = intersections. flatten(). mix( masked_beds . tap { masked_beds } )
252+ annotate_files = intersections. flatten(). mix( masked_vcfs . tap { masked_vcfs } )
273253
274254process variant_effect_predictor {
275255 input:
@@ -302,6 +282,12 @@ process variant_effect_predictor {
302282 exit 1;;
303283 esac
304284
285+ ## If the input file is empty, just copy
286+ if [ \$ ( wc -l "\$ infile" | awk '{print \$ 1}' ) -eq 0 ]; then
287+ cp "\$ infile" "\$ outfile"
288+ exit
289+ fi
290+
305291 variant_effect_predictor.pl \
306292 -i "\$ infile" \
307293 --format "\$ format" \
@@ -325,11 +311,10 @@ process variant_effect_predictor {
325311 """
326312}
327313
328- process snpEff() {
314+ process snpEff {
329315 input:
330- file vcf from vcfs_snpeff
316+ file vcf from annotate_files . tap { annotate_files }
331317 output:
332- file ' *snpeff_genes.txt'
333318 file ' *.snpeff'
334319
335320 publishDir params. outdir, mode: ' copy'
@@ -363,10 +348,8 @@ process snpEff() {
363348 sed 's/ID=AD,Number=./ID=AD,Number=R/' "\$ vcf" \
364349 | vt decompose -s - \
365350 | vt normalize -r $params . ref_fasta - \
366- | java -Xmx7G -jar "\$ SNPEFFJAR " -formatEff -classic GRCh37.75 \
351+ | java -Xmx7G -jar "\$ snpeffjar " -formatEff -classic GRCh37.75 \
367352 > "\$ vcf.snpeff"
368-
369- cp snpEff_genes.txt "\$ vcf.snpeff_genes.txt"
370353 """
371354}
372355
0 commit comments