@@ -68,10 +68,9 @@ else {
6868
6969process manta {
7070 input:
71- file ' bamfile_tmp ' from bamfile
71+ file ' bamfile ' from bamfile
7272 file ' bamfile.bai' from bamfile_index
7373 output:
74- file ' manta.bed' into manta_bed
7574 file ' manta.vcf' into manta_vcf
7675
7776 publishDir params. outdir, mode: ' copy'
@@ -80,7 +79,7 @@ process manta {
8079 module " $params . modules . manta "
8180
8281 errorStrategy { task. exitStatus == 143 ? ' retry' : ' terminate' }
83- time { params. long_job * 2 ** task. attempt }
82+ time { params. long_job * 2 ** ( task. attempt-1 ) }
8483 maxRetries 3
8584 queue ' core'
8685 cpus 4
@@ -95,7 +94,6 @@ process manta {
9594 mv results/variants/diploidSV.vcf.gz ../manta.vcf.gz
9695 cd ..
9796 gunzip -c manta.vcf.gz > manta.vcf
98- SVvcf2bed.pl manta.vcf > manta.bed
9997 """
10098}
10199
@@ -112,7 +110,6 @@ if (!params.fastq) {
112110 process create_fastq {
113111 input:
114112 file ' bamfile' from bamfile
115-
116113 output:
117114 file ' fastq.fq.gz' into fastq
118115
@@ -141,7 +138,6 @@ process fermikit {
141138 input:
142139 file ' sample.fq.gz' from fastq
143140 output:
144- file ' fermikit.bed' into fermi_bed
145141 file ' fermikit.vcf' into fermi_vcf
146142
147143 publishDir params. outdir, mode: ' copy'
@@ -162,33 +158,29 @@ process fermikit {
162158 bash calling.sh
163159 vcf-sort -c sample.sv.vcf.gz > fermikit.vcf
164160 bgzip -c fermikit.vcf > fermikit.vcf.gz
165- SVvcf2bed.pl fermikit.vcf > fermikit.bed
166161 """
167162}
168163
169164
170-
171165// 3. Create summary files
172166
173167// Collect vcfs and beds into one channel
174- beds = manta_bed. mix( fermi_bed )
175168vcfs = manta_vcf. mix( fermi_vcf )
176169
177-
178170mask_files = [
179171 " $baseDir /data/ceph18.b37.lumpy.exclude.2014-01-15.bed" ,
180172 " $baseDir /data/LCR-hs37d5.bed.gz"
181173]
182174
183175masks = mask_files. collect { file(it) }. channel()
184176// Collect both bed files and combine them with the mask files
185- beds . spread( masks. buffer(size : 2 ) ). set { mask_input }
177+ vcfs . tap { vcfs } . spread( masks. buffer(size : 2 ) ). set { mask_input }
186178
187179process mask_beds {
188180 input:
189- set file(bedfile ), file(mask1), file(mask2) from mask_input
181+ set file(svfile ), file(mask1), file(mask2) from mask_input
190182 output:
191- file ' *_masked.bed ' into masked_beds
183+ file ' *_masked.vcf ' into masked_vcfs
192184
193185 publishDir params. outdir, mode: ' copy'
194186
@@ -201,29 +193,30 @@ process mask_beds {
201193 module " $params . modules . bedtools "
202194
203195 """
204- BNAME=\$ ( echo $b edfile | cut -d. -f1 )
205- MASK_FILE=\$ {BNAME}_masked.bed
206- cat $b edfile \
207- | bedtools intersect -v -a stdin -b $mask1 -f 0.25 \
208- | bedtools intersect -v -a stdin -b $mask2 -f 0.25 > \$ MASK_FILE
196+ BNAME=\$ ( echo $s vfile | cut -d. -f1 )
197+ MASK_FILE=\$ {BNAME}_masked.vcf
198+ cat $s vfile \
199+ | bedtools intersect -header - v -a stdin -b $mask1 -f 0.25 \
200+ | bedtools intersect -header - v -a stdin -b $mask2 -f 0.25 > \$ MASK_FILE
209201 """
210202}
211203
212204
213205// To make intersect files we need to combine them into one channel with
214- // toList() . And also figure out if we have one or two files, therefore the
215- // tap and count_beds .
216- masked_beds . tap { count_beds_tmp }
217- .tap { masked_beds }
218- .toList (). set { intersect_input }
219- count_beds_tmp . count(). set { count_beds }
206+ // toSortedList() (fermi is before manta in alphabet) . And also figure out if we
207+ // have one or two files, therefore the tap and count_vcfs .
208+ masked_vcfs . tap { count_vcfs_tmp }
209+ .tap { masked_vcfs }
210+ .toSortedList (). set { intersect_input }
211+ count_vcfs_tmp . count(). set { count_vcfs }
220212
221213process intersect_files {
222214 input:
223- set file(bed1 ), file(bed2 ) from intersect_input
224- val nbeds from count_beds
215+ set file(fermi_vcf ), file(manta_vcf ) from intersect_input
216+ val nvcfs from count_vcfs
225217 output:
226- file " combined_masked.bed" into intersections
218+ file " combined_masked.vcf" into intersections
219+ file " combined_masked*.vcf"
227220
228221 publishDir params. outdir, mode: ' copy'
229222
@@ -235,33 +228,28 @@ process intersect_files {
235228 module ' bioinfo-tools'
236229 module " $params . modules . bedtools "
237230
238- when: nbeds == 2
231+ when: nvcfs == 2
239232
240233 script:
241234 """
242- ## In case grep doesn't find anything it will exit with non-zero exit
243- ## status, which will cause slurm to abort the job, we want to continue on
244- ## error here.
245- set +e
246-
247- ## Create intersected bed files
235+ ## Create intersected vcf files
248236 for WORD in DEL INS DUP; do
249- intersectBed -a <( grep -w \$ WORD $bed1 ) -b <( grep -w \$ WORD $bed2 ) \
237+ intersectBed -a <( grep -w "^#.\\ +\\ |\$ WORD" $fermi_vcf ) \
238+ -b <( grep -w "^#.\\ +\\ |\$ WORD" $manta_vcf ) \
250239 -f 0.5 -r \
251- | sort -k1,1V -k2,2n > combined_masked_\$ {WORD,,}.bed
240+ | sort -k1,1V -k2,2n > combined_masked_\$ {WORD,,}.vcf
252241 done
253242
254- cat <( grep -v -w 'DEL\\ |INS\\ |DUP' $bed1 ) \
255- <( grep -v -w 'DEL\\ |INS\\ |DUP' $bed2 ) \
256- | sort -k1,1V -k2,2n > combined_masked_OTHER.bed
243+ cat <( grep -v -w '^#.\\ +\\ |DEL\\ |INS\\ |DUP' $fermi_vcf ) \
244+ <( grep -v -w '^#.\\ +\\ |DEL\\ |INS\\ |DUP' $manta_vcf ) \
245+ | cut -f 1-8 \
246+ | sort -k1,1V -k2,2n > combined_masked_OTHER.vcf
257247
258- sort -k1,1V -k2,2n combined_masked_*.bed >> combined_masked.bed
259-
260- set -e # Restore exit-settings
248+ sort -k1,1V -k2,2n combined_masked_*.vcf >> combined_masked.vcf
261249 """
262250}
263251
264- annotate_files = intersections. flatten(). mix( masked_beds . tap { masked_beds } )
252+ annotate_files = intersections. flatten(). mix( masked_vcfs . tap { masked_vcfs } )
265253
266254process variant_effect_predictor {
267255 input:
@@ -294,6 +282,12 @@ process variant_effect_predictor {
294282 exit 1;;
295283 esac
296284
285+ ## If the input file is empty, just copy
286+ if [ \$ ( wc -l "\$ infile" | awk '{print \$ 1}' ) -eq 0 ]; then
287+ cp "\$ infile" "\$ outfile"
288+ exit
289+ fi
290+
297291 variant_effect_predictor.pl \
298292 -i "\$ infile" \
299293 --format "\$ format" \
@@ -317,11 +311,10 @@ process variant_effect_predictor {
317311 """
318312}
319313
320- process snpEff() {
314+ process snpEff {
321315 input:
322- file vcf from vcfs_snpeff
316+ file vcf from annotate_files . tap { annotate_files }
323317 output:
324- file ' *snpeff_genes.txt'
325318 file ' *.snpeff'
326319
327320 publishDir params. outdir, mode: ' copy'
@@ -355,10 +348,8 @@ process snpEff() {
355348 sed 's/ID=AD,Number=./ID=AD,Number=R/' "\$ vcf" \
356349 | vt decompose -s - \
357350 | vt normalize -r $params . ref_fasta - \
358- | java -Xmx7G -jar "\$ SNPEFFJAR " -formatEff -classic GRCh37.75 \
351+ | java -Xmx7G -jar "\$ snpeffjar " -formatEff -classic GRCh37.75 \
359352 > "\$ vcf.snpeff"
360-
361- cp snpEff_genes.txt "\$ vcf.snpeff_genes.txt"
362353 """
363354}
364355
0 commit comments