Merge pull request #44 from NBISweden/feature/better_order

viklund · web-flow · commit 07439316d7f9 · 2016-09-21T10:49:24.000+02:00
Feature/better order
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 .nextflow.*
+results*
+slurm-*
+CEP-1-7*
diff --git a/main.nf b/main.nf
@@ -26,10 +26,8 @@ if (!params.project) {
     exit 1, 'You need to specify what project to run under, see --help for more information'
 }
 
-if (params.run_all) {
-    params.run_fermikit = true
-    params.run_manta = true
-}
+
+workflowSteps = processWorkflowSteps(params.steps)
 
 
 startup_message()
@@ -51,16 +49,11 @@ if (!bamindex) {
         module "$params.modules.samtools"
 
         // We only need one core for this part
-        if ( nextflow_running_as_slurmjob() ) {
-            executor 'local'
-        }
-        else {
-            executor 'slurm'
-            queue 'core'
-            time params.short_job
-        }
+        executor choose_executor()
+        queue 'core'
+        time params.short_job
 
-        when: params.run_manta == true
+        when: 'indexbam' in workflowSteps
 
         script:
         """
@@ -86,7 +79,7 @@ process manta {
     module 'bioinfo-tools'
     module "$params.modules.manta"
 
-    when: params.run_manta == true
+    when: 'manta' in workflowSteps
 
     script:
     """
@@ -135,16 +128,11 @@ if (!params.fastq) {
         module "$params.modules.samtools"
 
         // We only need one core for this part
-        if ( nextflow_running_as_slurmjob() ) {
-            executor 'local'
-        }
-        else {
-            executor 'slurm'
-            queue 'core'
-            time params.short_job
-        }
+        executor choose_executor()
+        queue 'core'
+        time params.short_job
 
-        when: params.run_fermikit == true
+        when: 'fastq' in workflowSteps
 
         script:
         """
@@ -172,7 +160,7 @@ process fermikit {
     module "$params.modules.vcftools"
     module "$params.modules.tabix"
 
-    when: params.run_fermikit == true
+    when: 'fermikit' in workflowSteps
 
     script:
     """
@@ -193,7 +181,6 @@ process fermikit {
 // Collect vcfs and beds into one channel
 beds = manta_bed.mix( fermi_bed )
 vcfs = manta_vcf.mix( fermi_vcf )
-                .tap { vcfs_snpeff }
 
 
 mask_files = [
@@ -210,12 +197,13 @@ process mask_beds {
         set file(bedfile), file(mask1), file(mask2) from mask_input
     output:
         file '*_masked.bed' into masked_beds
-        file '*_masked_*.bed'
 
     publishDir params.outdir, mode: 'copy'
 
     // Does not use many resources, run it locally
-    executor 'local'
+    executor choose_executor()
+    queue 'core'
+    time params.short_job
 
     module 'bioinfo-tools'
     module "$params.modules.bedtools"
@@ -226,27 +214,15 @@ process mask_beds {
     cat $bedfile \
         | bedtools intersect -v -a stdin -b $mask1 -f 0.25 \
         | bedtools intersect -v -a stdin -b $mask2 -f 0.25 > \$MASK_FILE
-
-
-    ## In case grep doesn't find anything it will exit with non-zero exit
-    ## status, which will cause slurm to abort the job, we want to continue on
-    ## error here.
-    set +e
-
-    ## Create filtered bed files
-    for WORD in DEL INS DUP; do
-        grep -w \$WORD \$MASK_FILE > \${BNAME}_masked_\${WORD,,}.bed
-    done
-
-    set -e # Restore exit-settings
     """
 }
 
+
 // To make intersect files we need to combine them into one channel with
 // toList(). And also figure out if we have one or two files, therefore the
 // tap and count_beds.
 masked_beds.tap { count_beds_tmp }
-           .tap { masked_beds_vep }
+           .tap { masked_beds }
            .toList().set { intersect_input }
 count_beds_tmp.count().set { count_beds }
 
@@ -255,12 +231,14 @@ process intersect_files {
         set file(bed1), file(bed2) from intersect_input
         val nbeds from count_beds
     output:
-        file "combined*.bed"
+        file "combined_masked.bed" into intersections
 
     publishDir params.outdir, mode: 'copy'
 
     // Does not use many resources, run it locally
-    executor 'local'
+    executor choose_executor()
+    queue 'core'
+    time params.short_job
 
     module 'bioinfo-tools'
     module "$params.modules.bedtools"
@@ -281,35 +259,35 @@ process intersect_files {
             | sort -k1,1V -k2,2n > combined_masked_\${WORD,,}.bed
     done
 
+    cat <( grep -v -w 'DEL\\|INS\\|DUP' $bed1 ) \
+        <( grep -v -w 'DEL\\|INS\\|DUP' $bed2 ) \
+        | sort -k1,1V -k2,2n > combined_masked_OTHER.bed
+
+    sort -k1,1V -k2,2n combined_masked_*.bed >> combined_masked.bed
+
     set -e # Restore exit-settings
     """
 }
 
+annotate_files = intersections.flatten().mix( masked_beds.tap { masked_beds } )
 
-vep_infiles = masked_beds_vep.mix(vcfs)
-
-// TODO: Figure out running characteristics
 process variant_effect_predictor {
     input:
-        file infile from vep_infiles
+        file infile from annotate_files.tap { annotate_files }
     output:
-        file '*.vep'
+        file '*.vep' into vep_outfiles
 
     publishDir params.outdir, mode: 'copy'
 
-    // We only need one core for this part
-    if ( nextflow_running_as_slurmjob() ) {
-        executor 'local'
-    }
-    else {
-        executor 'slurm'
-        queue 'core'
-        time params.short_job
-    }
+    executor choose_executor()
+    queue 'core'
+    time params.short_job
 
     module 'bioinfo-tools'
     module "$params.modules.vep"
 
+    when: 'vep' in workflowSteps
+
     script:
     """
     infile="$infile"
@@ -325,7 +303,7 @@ process variant_effect_predictor {
     esac
 
     variant_effect_predictor.pl \
-        -i "\$infile"               \
+        -i "\$infile"              \
         --format "\$format"        \
         -cache --dir "\$vep_cache" \
         -o "\$outfile"             \
@@ -347,7 +325,6 @@ process variant_effect_predictor {
     """
 }
 
-
 process snpEff() {
     input:
         file vcf from vcfs_snpeff
@@ -361,7 +338,11 @@ process snpEff() {
     module "$params.modules.snpeff"
 
     // Does not use many resources, run it locally
-    executor 'local'
+    executor choose_executor()
+    queue 'core'
+    time params.short_job
+
+    when: 'snpeff' in workflowSteps
 
     script:
     """
@@ -404,9 +385,9 @@ def usage_message() {
     log.info '  Optional'
     log.info '    --help          Show this message and exit'
     log.info '    --fastq         Input fastqfile (default is bam but with fq as fileending)'
-    log.info '    --run_manta     Run manta (default)'
-    log.info '    --run_fermikit  Run fermikit'
-    log.info '    --run_all       Run all callers'
+    log.info '    --steps         Specify what steps to run, comma separated:'
+    log.info '                Callers: manta, fermikit, cnvnator (choose one or many)'
+    log.info '                Annotation: vep OR snpeff'
     log.info '    --long_job      Running time for long job (callers, fermi and manta)'
     log.info '    --short_job     Running time for short jobs (bam indexing and bam2fq)'
     log.info '    --outdir        Directory where resultfiles are stored'
@@ -425,6 +406,7 @@ def startup_message() {
     log.info "Work dir   : $workDir"
     log.info "Output dir : $params.outdir"
     log.info "Project    : $params.project"
+    log.info "Will run   : " + workflowSteps.join(", ")
     log.info ""
 }
 
@@ -473,3 +455,29 @@ def nextflow_running_as_slurmjob() {
     }
     return false
 }
+
+def choose_executor() {
+    return nextflow_running_as_slurmjob() ? 'local' : 'slurm'
+}
+
+def processWorkflowSteps(steps) {
+    if ( ! steps ) {
+        return []
+    }
+
+    workflowSteps = steps.split(',').collect { it.trim().toLowerCase() }
+
+    if ('vep' in workflowSteps && 'snpeff' in workflowSteps) {
+        exit 1, 'You can only run one annotator, either "vep" or "snpeff"'
+    }
+
+    if ('manta' in workflowSteps) {
+        workflowSteps.push( 'indexbam' )
+    }
+
+    if ('fermikit' in workflowSteps) {
+        workflowSteps.push( 'fastq' )
+    }
+
+    return workflowSteps
+}
diff --git a/nextflow.config b/nextflow.config
@@ -1,4 +1,5 @@
 params {
+    steps = 'manta,vep' // Change on commandline --steps x,y,z
     project = "" // Set project or supply on commandline ( --project )
     outdir = "results"
 
@@ -26,8 +27,6 @@ params {
 
     long_job  = '10h' // used for the callers (fermikit & manta)
     short_job = '30m' // used for bam indexing and bam2fq
-
-    run_manta = true
 }
 
 process {