Skip to content

Commit 3c5ce51

Browse files
committed
Use vcf-files instead of bed files for all downstream analysis.
1 parent aa47376 commit 3c5ce51

File tree

2 files changed

+41
-50
lines changed

2 files changed

+41
-50
lines changed

main.nf

Lines changed: 40 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,9 @@ else {
6868

6969
process manta {
7070
input:
71-
file 'bamfile_tmp' from bamfile
71+
file 'bamfile' from bamfile
7272
file 'bamfile.bai' from bamfile_index
7373
output:
74-
file 'manta.bed' into manta_bed
7574
file 'manta.vcf' into manta_vcf
7675

7776
publishDir params.outdir, mode: 'copy'
@@ -80,7 +79,7 @@ process manta {
8079
module "$params.modules.manta"
8180

8281
errorStrategy { task.exitStatus == 143 ? 'retry' : 'terminate' }
83-
time { params.long_job * 2**task.attempt }
82+
time { params.long_job * 2**(task.attempt-1) }
8483
maxRetries 3
8584
queue 'core'
8685
cpus 4
@@ -95,7 +94,6 @@ process manta {
9594
mv results/variants/diploidSV.vcf.gz ../manta.vcf.gz
9695
cd ..
9796
gunzip -c manta.vcf.gz > manta.vcf
98-
SVvcf2bed.pl manta.vcf > manta.bed
9997
"""
10098
}
10199

@@ -112,7 +110,6 @@ if (!params.fastq) {
112110
process create_fastq {
113111
input:
114112
file 'bamfile' from bamfile
115-
116113
output:
117114
file 'fastq.fq.gz' into fastq
118115

@@ -141,7 +138,6 @@ process fermikit {
141138
input:
142139
file 'sample.fq.gz' from fastq
143140
output:
144-
file 'fermikit.bed' into fermi_bed
145141
file 'fermikit.vcf' into fermi_vcf
146142

147143
publishDir params.outdir, mode: 'copy'
@@ -162,33 +158,29 @@ process fermikit {
162158
bash calling.sh
163159
vcf-sort -c sample.sv.vcf.gz > fermikit.vcf
164160
bgzip -c fermikit.vcf > fermikit.vcf.gz
165-
SVvcf2bed.pl fermikit.vcf > fermikit.bed
166161
"""
167162
}
168163

169164

170-
171165
// 3. Create summary files
172166

173167
// Collect vcfs and beds into one channel
174-
beds = manta_bed.mix( fermi_bed )
175168
vcfs = manta_vcf.mix( fermi_vcf )
176169

177-
178170
mask_files = [
179171
"$baseDir/data/ceph18.b37.lumpy.exclude.2014-01-15.bed",
180172
"$baseDir/data/LCR-hs37d5.bed.gz"
181173
]
182174

183175
masks = mask_files.collect { file(it) }.channel()
184176
// Collect both bed files and combine them with the mask files
185-
beds.spread( masks.buffer(size: 2) ).set { mask_input }
177+
vcfs.tap { vcfs }.spread( masks.buffer(size: 2) ).set { mask_input }
186178

187179
process mask_beds {
188180
input:
189-
set file(bedfile), file(mask1), file(mask2) from mask_input
181+
set file(svfile), file(mask1), file(mask2) from mask_input
190182
output:
191-
file '*_masked.bed' into masked_beds
183+
file '*_masked.vcf' into masked_vcfs
192184

193185
publishDir params.outdir, mode: 'copy'
194186

@@ -201,29 +193,30 @@ process mask_beds {
201193
module "$params.modules.bedtools"
202194

203195
"""
204-
BNAME=\$( echo $bedfile | cut -d. -f1 )
205-
MASK_FILE=\${BNAME}_masked.bed
206-
cat $bedfile \
207-
| bedtools intersect -v -a stdin -b $mask1 -f 0.25 \
208-
| bedtools intersect -v -a stdin -b $mask2 -f 0.25 > \$MASK_FILE
196+
BNAME=\$( echo $svfile | cut -d. -f1 )
197+
MASK_FILE=\${BNAME}_masked.vcf
198+
cat $svfile \
199+
| bedtools intersect -header -v -a stdin -b $mask1 -f 0.25 \
200+
| bedtools intersect -header -v -a stdin -b $mask2 -f 0.25 > \$MASK_FILE
209201
"""
210202
}
211203

212204

213205
// To make intersect files we need to combine them into one channel with
214-
// toList(). And also figure out if we have one or two files, therefore the
215-
// tap and count_beds.
216-
masked_beds.tap { count_beds_tmp }
217-
.tap { masked_beds }
218-
.toList().set { intersect_input }
219-
count_beds_tmp.count().set { count_beds }
206+
// toSortedList() (fermi is before manta in alphabet). And also figure out if we
207+
// have one or two files, therefore the tap and count_vcfs.
208+
masked_vcfs.tap { count_vcfs_tmp }
209+
.tap { masked_vcfs }
210+
.toSortedList().set { intersect_input }
211+
count_vcfs_tmp.count().set { count_vcfs }
220212

221213
process intersect_files {
222214
input:
223-
set file(bed1), file(bed2) from intersect_input
224-
val nbeds from count_beds
215+
set file(fermi_vcf), file(manta_vcf) from intersect_input
216+
val nvcfs from count_vcfs
225217
output:
226-
file "combined_masked.bed" into intersections
218+
file "combined_masked.vcf" into intersections
219+
file "combined_masked*.vcf"
227220

228221
publishDir params.outdir, mode: 'copy'
229222

@@ -235,33 +228,28 @@ process intersect_files {
235228
module 'bioinfo-tools'
236229
module "$params.modules.bedtools"
237230

238-
when: nbeds == 2
231+
when: nvcfs == 2
239232

240233
script:
241234
"""
242-
## In case grep doesn't find anything it will exit with non-zero exit
243-
## status, which will cause slurm to abort the job, we want to continue on
244-
## error here.
245-
set +e
246-
247-
## Create intersected bed files
235+
## Create intersected vcf files
248236
for WORD in DEL INS DUP; do
249-
intersectBed -a <( grep -w \$WORD $bed1 ) -b <( grep -w \$WORD $bed2 ) \
237+
intersectBed -a <( grep -w "^#.\\+\\|\$WORD" $fermi_vcf) \
238+
-b <( grep -w "^#.\\+\\|\$WORD" $manta_vcf) \
250239
-f 0.5 -r \
251-
| sort -k1,1V -k2,2n > combined_masked_\${WORD,,}.bed
240+
| sort -k1,1V -k2,2n > combined_masked_\${WORD,,}.vcf
252241
done
253242
254-
cat <( grep -v -w 'DEL\\|INS\\|DUP' $bed1 ) \
255-
<( grep -v -w 'DEL\\|INS\\|DUP' $bed2 ) \
256-
| sort -k1,1V -k2,2n > combined_masked_OTHER.bed
243+
cat <( grep -v -w '^#.\\+\\|DEL\\|INS\\|DUP' $fermi_vcf ) \
244+
<( grep -v -w '^#.\\+\\|DEL\\|INS\\|DUP' $manta_vcf ) \
245+
| cut -f 1-8 \
246+
| sort -k1,1V -k2,2n > combined_masked_OTHER.vcf
257247
258-
sort -k1,1V -k2,2n combined_masked_*.bed >> combined_masked.bed
259-
260-
set -e # Restore exit-settings
248+
sort -k1,1V -k2,2n combined_masked_*.vcf >> combined_masked.vcf
261249
"""
262250
}
263251

264-
annotate_files = intersections.flatten().mix( masked_beds.tap { masked_beds } )
252+
annotate_files = intersections.flatten().mix( masked_vcfs.tap { masked_vcfs } )
265253

266254
process variant_effect_predictor {
267255
input:
@@ -294,6 +282,12 @@ process variant_effect_predictor {
294282
exit 1;;
295283
esac
296284
285+
## If the input file is empty, just copy
286+
if [ \$( wc -l "\$infile" | awk '{print \$1}' ) -eq 0 ]; then
287+
cp "\$infile" "\$outfile"
288+
exit
289+
fi
290+
297291
variant_effect_predictor.pl \
298292
-i "\$infile" \
299293
--format "\$format" \
@@ -317,11 +311,10 @@ process variant_effect_predictor {
317311
"""
318312
}
319313

320-
process snpEff() {
314+
process snpEff {
321315
input:
322-
file vcf from vcfs_snpeff
316+
file vcf from annotate_files.tap { annotate_files }
323317
output:
324-
file '*snpeff_genes.txt'
325318
file '*.snpeff'
326319

327320
publishDir params.outdir, mode: 'copy'
@@ -355,10 +348,8 @@ process snpEff() {
355348
sed 's/ID=AD,Number=./ID=AD,Number=R/' "\$vcf" \
356349
| vt decompose -s - \
357350
| vt normalize -r $params.ref_fasta - \
358-
| java -Xmx7G -jar "\$SNPEFFJAR" -formatEff -classic GRCh37.75 \
351+
| java -Xmx7G -jar "\$snpeffjar" -formatEff -classic GRCh37.75 \
359352
> "\$vcf.snpeff"
360-
361-
cp snpEff_genes.txt "\$vcf.snpeff_genes.txt"
362353
"""
363354
}
364355

nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ params {
1010
fermikit = 'fermikit/r178'
1111
vcftools = "vcftools/0.1.14"
1212
tabix = "tabix/0.2.6"
13-
bedtools = "BEDTools/2.23.0"
13+
bedtools = "BEDTools/2.25.0"
1414
vep = "vep/84"
1515
snpeff = "snpEff/4.2"
1616
}

0 commit comments

Comments
 (0)