Skip to content

Commit aa247b6

Browse files
authored
Merge pull request #46 from NBISweden/feature/vcf_files
Feature - vcf-files
2 parents 0743931 + 3c5ce51 commit aa247b6

File tree

2 files changed

+47
-64
lines changed

2 files changed

+47
-64
lines changed

main.nf

Lines changed: 45 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -68,42 +68,32 @@ else {
6868

6969
process manta {
7070
input:
71-
file 'bamfile_tmp' from bamfile
71+
file 'bamfile' from bamfile
7272
file 'bamfile.bai' from bamfile_index
7373
output:
74-
file 'manta.bed' into manta_bed
7574
file 'manta.vcf' into manta_vcf
7675

7776
publishDir params.outdir, mode: 'copy'
7877

7978
module 'bioinfo-tools'
8079
module "$params.modules.manta"
8180

81+
errorStrategy { task.exitStatus == 143 ? 'retry' : 'terminate' }
82+
time { params.long_job * 2**(task.attempt-1) }
83+
maxRetries 3
84+
queue 'core'
85+
cpus 4
86+
8287
when: 'manta' in workflowSteps
8388

8489
script:
8590
"""
86-
# Manta follows symlinks and expects the index to be with the original
87-
# file, so we copy it, and then clean up at script EXIT with a trap.
88-
# TODO, this is fixed in manta v0.29.5, https://github.com/Illumina/manta/issues/32
89-
DIR=`pwd`
90-
function cleanup() {
91-
echo "CLEAN UP"
92-
cd \$DIR
93-
if [ -f bamfile ]; then
94-
rm bamfile
95-
fi
96-
}
97-
trap cleanup EXIT
98-
cp bamfile_tmp bamfile
99-
10091
configManta.py --normalBam bamfile --referenceFasta $params.ref_fasta --runDir testRun
10192
cd testRun
10293
./runWorkflow.py -m local -j $params.threads
10394
mv results/variants/diploidSV.vcf.gz ../manta.vcf.gz
10495
cd ..
10596
gunzip -c manta.vcf.gz > manta.vcf
106-
SVvcf2bed.pl manta.vcf > manta.bed
10797
"""
10898
}
10999

@@ -120,7 +110,6 @@ if (!params.fastq) {
120110
process create_fastq {
121111
input:
122112
file 'bamfile' from bamfile
123-
124113
output:
125114
file 'fastq.fq.gz' into fastq
126115

@@ -149,7 +138,6 @@ process fermikit {
149138
input:
150139
file 'sample.fq.gz' from fastq
151140
output:
152-
file 'fermikit.bed' into fermi_bed
153141
file 'fermikit.vcf' into fermi_vcf
154142

155143
publishDir params.outdir, mode: 'copy'
@@ -170,33 +158,29 @@ process fermikit {
170158
bash calling.sh
171159
vcf-sort -c sample.sv.vcf.gz > fermikit.vcf
172160
bgzip -c fermikit.vcf > fermikit.vcf.gz
173-
SVvcf2bed.pl fermikit.vcf > fermikit.bed
174161
"""
175162
}
176163

177164

178-
179165
// 3. Create summary files
180166

181167
// Collect vcfs and beds into one channel
182-
beds = manta_bed.mix( fermi_bed )
183168
vcfs = manta_vcf.mix( fermi_vcf )
184169

185-
186170
mask_files = [
187171
"$baseDir/data/ceph18.b37.lumpy.exclude.2014-01-15.bed",
188172
"$baseDir/data/LCR-hs37d5.bed.gz"
189173
]
190174

191175
masks = mask_files.collect { file(it) }.channel()
192176
// Collect both bed files and combine them with the mask files
193-
beds.spread( masks.buffer(size: 2) ).set { mask_input }
177+
vcfs.tap { vcfs }.spread( masks.buffer(size: 2) ).set { mask_input }
194178

195179
process mask_beds {
196180
input:
197-
set file(bedfile), file(mask1), file(mask2) from mask_input
181+
set file(svfile), file(mask1), file(mask2) from mask_input
198182
output:
199-
file '*_masked.bed' into masked_beds
183+
file '*_masked.vcf' into masked_vcfs
200184

201185
publishDir params.outdir, mode: 'copy'
202186

@@ -209,29 +193,30 @@ process mask_beds {
209193
module "$params.modules.bedtools"
210194

211195
"""
212-
BNAME=\$( echo $bedfile | cut -d. -f1 )
213-
MASK_FILE=\${BNAME}_masked.bed
214-
cat $bedfile \
215-
| bedtools intersect -v -a stdin -b $mask1 -f 0.25 \
216-
| bedtools intersect -v -a stdin -b $mask2 -f 0.25 > \$MASK_FILE
196+
BNAME=\$( echo $svfile | cut -d. -f1 )
197+
MASK_FILE=\${BNAME}_masked.vcf
198+
cat $svfile \
199+
| bedtools intersect -header -v -a stdin -b $mask1 -f 0.25 \
200+
| bedtools intersect -header -v -a stdin -b $mask2 -f 0.25 > \$MASK_FILE
217201
"""
218202
}
219203

220204

221205
// To make intersect files we need to combine them into one channel with
222-
// toList(). And also figure out if we have one or two files, therefore the
223-
// tap and count_beds.
224-
masked_beds.tap { count_beds_tmp }
225-
.tap { masked_beds }
226-
.toList().set { intersect_input }
227-
count_beds_tmp.count().set { count_beds }
206+
// toSortedList() (fermi is before manta in alphabet). And also figure out if we
207+
// have one or two files, therefore the tap and count_vcfs.
208+
masked_vcfs.tap { count_vcfs_tmp }
209+
.tap { masked_vcfs }
210+
.toSortedList().set { intersect_input }
211+
count_vcfs_tmp.count().set { count_vcfs }
228212

229213
process intersect_files {
230214
input:
231-
set file(bed1), file(bed2) from intersect_input
232-
val nbeds from count_beds
215+
set file(fermi_vcf), file(manta_vcf) from intersect_input
216+
val nvcfs from count_vcfs
233217
output:
234-
file "combined_masked.bed" into intersections
218+
file "combined_masked.vcf" into intersections
219+
file "combined_masked*.vcf"
235220

236221
publishDir params.outdir, mode: 'copy'
237222

@@ -243,33 +228,28 @@ process intersect_files {
243228
module 'bioinfo-tools'
244229
module "$params.modules.bedtools"
245230

246-
when: nbeds == 2
231+
when: nvcfs == 2
247232

248233
script:
249234
"""
250-
## In case grep doesn't find anything it will exit with non-zero exit
251-
## status, which will cause slurm to abort the job, we want to continue on
252-
## error here.
253-
set +e
254-
255-
## Create intersected bed files
235+
## Create intersected vcf files
256236
for WORD in DEL INS DUP; do
257-
intersectBed -a <( grep -w \$WORD $bed1 ) -b <( grep -w \$WORD $bed2 ) \
237+
intersectBed -a <( grep -w "^#.\\+\\|\$WORD" $fermi_vcf) \
238+
-b <( grep -w "^#.\\+\\|\$WORD" $manta_vcf) \
258239
-f 0.5 -r \
259-
| sort -k1,1V -k2,2n > combined_masked_\${WORD,,}.bed
240+
| sort -k1,1V -k2,2n > combined_masked_\${WORD,,}.vcf
260241
done
261242
262-
cat <( grep -v -w 'DEL\\|INS\\|DUP' $bed1 ) \
263-
<( grep -v -w 'DEL\\|INS\\|DUP' $bed2 ) \
264-
| sort -k1,1V -k2,2n > combined_masked_OTHER.bed
265-
266-
sort -k1,1V -k2,2n combined_masked_*.bed >> combined_masked.bed
243+
cat <( grep -v -w '^#.\\+\\|DEL\\|INS\\|DUP' $fermi_vcf ) \
244+
<( grep -v -w '^#.\\+\\|DEL\\|INS\\|DUP' $manta_vcf ) \
245+
| cut -f 1-8 \
246+
| sort -k1,1V -k2,2n > combined_masked_OTHER.vcf
267247
268-
set -e # Restore exit-settings
248+
sort -k1,1V -k2,2n combined_masked_*.vcf >> combined_masked.vcf
269249
"""
270250
}
271251

272-
annotate_files = intersections.flatten().mix( masked_beds.tap { masked_beds } )
252+
annotate_files = intersections.flatten().mix( masked_vcfs.tap { masked_vcfs } )
273253

274254
process variant_effect_predictor {
275255
input:
@@ -302,6 +282,12 @@ process variant_effect_predictor {
302282
exit 1;;
303283
esac
304284
285+
## If the input file is empty, just copy
286+
if [ \$( wc -l "\$infile" | awk '{print \$1}' ) -eq 0 ]; then
287+
cp "\$infile" "\$outfile"
288+
exit
289+
fi
290+
305291
variant_effect_predictor.pl \
306292
-i "\$infile" \
307293
--format "\$format" \
@@ -325,11 +311,10 @@ process variant_effect_predictor {
325311
"""
326312
}
327313

328-
process snpEff() {
314+
process snpEff {
329315
input:
330-
file vcf from vcfs_snpeff
316+
file vcf from annotate_files.tap { annotate_files }
331317
output:
332-
file '*snpeff_genes.txt'
333318
file '*.snpeff'
334319

335320
publishDir params.outdir, mode: 'copy'
@@ -363,10 +348,8 @@ process snpEff() {
363348
sed 's/ID=AD,Number=./ID=AD,Number=R/' "\$vcf" \
364349
| vt decompose -s - \
365350
| vt normalize -r $params.ref_fasta - \
366-
| java -Xmx7G -jar "\$SNPEFFJAR" -formatEff -classic GRCh37.75 \
351+
| java -Xmx7G -jar "\$snpeffjar" -formatEff -classic GRCh37.75 \
367352
> "\$vcf.snpeff"
368-
369-
cp snpEff_genes.txt "\$vcf.snpeff_genes.txt"
370353
"""
371354
}
372355

nextflow.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ params {
66
// Modules and their versions on the HPC-system
77
modules {
88
samtools = 'samtools/0.1.19'
9-
manta = 'manta/0.27.1'
9+
manta = 'manta/1.0.0'
1010
fermikit = 'fermikit/r178'
1111
vcftools = "vcftools/0.1.14"
1212
tabix = "tabix/0.2.6"
13-
bedtools = "BEDTools/2.23.0"
13+
bedtools = "BEDTools/2.25.0"
1414
vep = "vep/84"
1515
snpeff = "snpEff/4.2"
1616
}

0 commit comments

Comments
 (0)