luisas · suzannejin · Mar 24, 2025 · Mar 25, 2025 · Mar 25, 2025 · Mar 25, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -35,6 +35,8 @@ jobs:
           - "singularity"
         test_name:
           - "test"
+          - "test_ibis"
+          - "test_ibis_with_preprocessing"
           - "test_noise_eval"
         isMaster:
           - ${{ github.base_ref == 'master' }}

diff --git a/conf/dev.config b/conf/dev.config
@@ -0,0 +1,7 @@
+params {
+    config_profile_name        = 'Development profile'
+    config_profile_description = 'Params needed during development'
+
+    // container
+    container_dev = "docker.io/mathysgrapotte/stimulus-py:dev"
+}
diff --git a/conf/modules.config b/conf/modules.config
@@ -90,62 +90,83 @@ process {
     // main config
     // ==============================================================================
 
-    withName: "STIMULUS_SPLIT_SPLIT" {
+    withName: "STIMULUS_SPLIT_TRANSFORM" {
         publishDir = [
             path: { "${params.outdir}/configs/${meta.id}" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
-
         tag = { "${meta.id}" }
+        if (params.container_dev) {
+            container = params.container_dev
+        }
     }
 
     withName: "STIMULUS_SPLIT_DATA" {
         publishDir = [
             enabled: false
         ]
-        ext.prefix = { "${meta.id}-split-${meta2.split_id}" }
+        ext.prefix = { "${meta.id}-split-${meta.split_id}" }
+        tag = { "${meta.id} - split: ${meta.split_id}" }
+        if (params.container_dev) {
+            container = params.container_dev
+        }
     }
 
     withName: "STIMULUS_TRANSFORM_CSV" {
         publishDir = [
-            path: { "${params.outdir}/data/${meta.id}" },
+            path: { "${params.outdir}/transformed_data/${meta.id}" },
             mode: params.publish_dir_mode,
             when: params.save_data,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
-        ext.prefix = { "${meta.id}-${meta2.split_id}-${meta2.transform_id}" }
+        ext.prefix = { "${meta.id}-split-${meta.split_id}-trans-${meta.transform_id}" }
+        tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id}" }
+        if (params.container_dev) {
+            container = params.container_dev
+        }
     }
 
     withName: "CHECK_MODEL" {
         ext.args = { [
             params.check_model_num_samples ? "-n ${params.check_model_num_samples}" : '',
             params.debug_mode ? "--debug_mode" : ''
         ].flatten().unique(false).join(' ').trim()}
-
         tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id}" }
+        if (params.container_dev) {
+            container = params.container_dev
+        }
     }
 
     withName: "STIMULUS_TUNE" {
         ext.args = { [
             params.debug_mode ? "--debug_mode" : ''
         ].flatten().unique(false).join(' ').trim()}
         publishDir = [
-            path: { "${params.outdir}/tune_results/${meta.id}/${meta.split_id}/${meta   .transform_id}" },
+            path: { "${params.outdir}/tune_results/${meta.id}/${meta.split_id}/${meta.transform_id}" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
-        ext.prefix = { "${meta.id}-${meta.split_id}-${meta.transform_id}" }
-        tag    = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id} - replicate: ${meta.replicate}" }
+        ext.prefix = { "${meta.id}-split-${meta.split_id}-trans-${meta.transform_id}-rep-${meta.replicate}" }
+        tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id} - replicate: ${meta.replicate}" }
+        if (params.container_dev) {
+            container = params.container_dev
+        }
     }
 
     withName: "STIMULUS_PREDICT" {
-        ext.prefix = { "${meta.id}-${meta.split_id}-${meta.transform_id}-${meta.replicate}" }
-        tag    = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id} - replicate: ${meta.replicate}" }
+        ext.prefix = { "${meta.id}-split-${meta.split_id}-transf-${meta.transform_id}-rep-${meta.replicate}" }
+        tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id} - replicate: ${meta.replicate}" }
+        if (params.container_dev) {
+            container = params.container_dev
+        }
     }
 
     withName: "STIMULUS_COMPARE_TENSORS_COSINE" {
         ext.args = { "--mode cosine_similarity" }
+        if (params.container_dev) {
+            container = params.container_dev
+        }
     }
 
     withName: "CONCAT_COSINE" {

diff --git a/conf/test_ibis.config b/conf/test_ibis.config
@@ -0,0 +1,47 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run main.nf -profile test_ibis,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test ibis profile'
+    config_profile_description = 'Minimal ibis test dataset to check pipeline functions'
+
+    // Input data
+    data                 = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ibis_SP140/SP140_shade.stimulus.csv'
+    data_config          = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/data.yaml'
+    model                = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ConvBasic_withEfficientKAN.py'
+    model_config         = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ConvBasic_withEfficientKAN.yaml'
+
+    // output
+    save_data    = true
+}
+
+// Limit resources so that this can run on GitHub Actions
+process {
+    maxRetries = params.max_retries
+    errorStrategy = params.err_start
+
+    withLabel:process_low {
+        cpus   = { 1                    }
+        memory = { 4.GB * task.attempt  }
+        time   = { 10.m  * task.attempt }
+    }
+    withLabel:process_medium {
+        cpus   = { 2                    }
+        memory = { 6.GB * task.attempt  }
+        time   = { 30.m  * task.attempt }
+    }
+    withLabel:process_high {
+        cpus   = { 4                    }
+        memory = { 8.GB * task.attempt  }
+        time   = { 1.h  * task.attempt  }
+    }
+}
diff --git a/conf/test_ibis_with_preprocessing.config b/conf/test_ibis_with_preprocessing.config
@@ -0,0 +1,50 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run main.nf -profile test_ibis,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test ibis profile - with preprocessing'
+    config_profile_description = 'Minimal ibis test dataset (with preprocessing) to check pipeline functions'
+
+    // Input data
+    data                 = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ibis_SP140/SP140_ghts.peaks'
+    data_config          = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/data.yaml'
+    model                = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ConvBasic_withEfficientKAN.py'
+    model_config         = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ConvBasic_withEfficientKAN.yaml'
+    preprocessing_config = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ibis_SP140/preprocessing.yaml'
+    genome               = 'GRCh38'
+    bed_peak_size        = 40
+
+    // output
+    save_data    = true
+}
+
+// Limit resources so that this can run on GitHub Actions
+process {
+    maxRetries = params.max_retries
+    errorStrategy = params.err_start
+
+    withLabel:process_low {
+        cpus   = { 1                    }
+        memory = { 4.GB * task.attempt  }
+        time   = { 10.m  * task.attempt }
+    }
+    withLabel:process_medium {
+        cpus   = { 2                    }
+        memory = { 6.GB * task.attempt  }
+        time   = { 30.m  * task.attempt }
+    }
+    withLabel:process_high {
+        cpus   = { 4                    }
+        memory = { 8.GB * task.attempt  }
+        time   = { 1.h  * task.attempt  }
+    }
+}
diff --git a/modules/local/stimulus/tune/main.nf b/modules/local/stimulus/tune/main.nf
@@ -8,18 +8,19 @@ process STIMULUS_TUNE {
     tuple val(meta2), path(model), path(model_config), path(initial_weights)
 
     output:
-    tuple val(meta_complete), path(model), path("best_config.json"), path("${prefix}-best-model.safetensors") , emit: best_model
-    tuple val(meta_complete), path("${prefix}-best-optimizer.opt")                               , emit: optimizer
-    tuple val(meta_complete), path("TuneModel_*")                                                , emit: tune_experiments, optional: true
-    // Now we need to output this one for the predict module - this will be have to be changed!
-    tuple val(meta), path(data_sub_config)                                              , emit: data_config
-    path "versions.yml"          , emit: versions
+    tuple val(meta), path("${prefix}-best-model.safetensors")         , emit: model
+    tuple val(meta), path("${prefix}-best-optimizer.opt")             , emit: optimizer
+    tuple val(meta), path("optuna_results/artifacts")                 , emit: artifacts
+    tuple val(meta), path("optuna_results/optuna_journal_storage.log"), emit: journal
+    path "versions.yml"                                               , emit: versions
+    // now we need to output these in this format for the predict module - thiw will have to be changed!
+    tuple val(meta), path(model), path("best_config.json"), path("${prefix}-best-model.safetensors"), emit: model_tmp
+    tuple val(meta), path(data_sub_config)                                                          , emit: data_config_tmp
 
     script:
     prefix = task.ext.prefix ?: meta.id
     def args = task.ext.args ?: ""
     def use_initial_weights = initial_weights != [] ? "-w ${initial_weights}" : ""
-    meta_complete = meta2 + meta
     """
     stimulus tune \
         -d ${transformed_data} \

diff --git a/nextflow.config b/nextflow.config
@@ -51,6 +51,10 @@ params {
     shuffle                 = true                   // flag to tell wether to shuffle or not the data and run a train on it. Sanity check always run on default. (If the way we think at shuffle change maybe is better to remove this flag and make it into a parameter of the user given json for noise nad split)
     debug_mode              = false                  // flag used to switch to debug mode for the pipeline.
 
+    // Development stage options
+    // please change them in dev.config
+    container_dev           = null
+
     // General
     help                    = false
     validate_params         = true                     // tells wether or not to validate input values using nf-schema.
@@ -191,10 +195,13 @@ profiles {
             ]
         }
     }
-    test            { includeConfig "conf/test.config"            }
-    test_stub       { includeConfig "conf/test_stub.config"       }
+    test         { includeConfig "conf/test.config"       }
+    test_stub    { includeConfig "conf/test_stub.config"  }
+    test_ibis    { includeConfig "conf/test_ibis.config"  }
+    test_ibis_with_preprocessing { includeConfig "conf/test_ibis_with_preprocessing.config" }
     test_noise_eval { includeConfig "conf/test_noise_eval.config" }
-    local           { includeConfig "conf/local.config"           }
+    local        { includeConfig "conf/local.config"      }
+    dev          { includeConfig "conf/dev.config"        }
 }
 
 // Load nf-core custom profiles from different Institutions

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -160,6 +160,12 @@
                     "minimum": 0,
                     "help_text": "requesting the gpus for the tuning steps.",
                     "description": "set maximum GPU limit"
+                },
+                "container_dev": {
+                    "type": "string",
+                    "default": null,
+                    "help_text": "Container for development stage.",
+                    "description": "Container for development stage"
                 }
             }
         },

diff --git a/subworkflows/local/split_csv/main.nf b/subworkflows/local/split_csv/main.nf
@@ -16,7 +16,7 @@ workflow SPLIT_CSV_WF {
 
     take:
     ch_data
-    ch_yaml_sub_config
+    ch_config_split
 
     main:
 
@@ -26,9 +26,21 @@ workflow SPLIT_CSV_WF {
     // Split csv data using stimulus
     // ==============================================================================
 
+    // combine each data with each split config
+    ch_input = ch_data
+        .combine(ch_config_split)
+        .multiMap { meta_data, data, meta_config, config ->
+            def meta = meta_data + [split_id: meta_config.split_id]
+            data:
+            [meta, data]
+            config:
+            [meta, config]
+        }
+
+    // run stimulus split
     STIMULUS_SPLIT_DATA(
-        ch_data,
-        ch_yaml_sub_config
+        ch_input.data,
+        ch_input.config
     )
     ch_split_data = STIMULUS_SPLIT_DATA.out.csv_with_split
     ch_versions = ch_versions.mix(STIMULUS_SPLIT_DATA.out.versions)

diff --git a/subworkflows/local/transform_csv/main.nf b/subworkflows/local/transform_csv/main.nf
@@ -16,32 +16,40 @@ workflow TRANSFORM_CSV_WF {
 
     take:
     ch_split_data
-    ch_sub_config
+    ch_config_transform
 
     main:
+
+    ch_versions = Channel.empty()
+
+
     // TODO add strategy for handling the launch of stimulus noiser as well as NF-core and other modules
     // TODO if the option is parellalization (for the above) then add csv column splitting  noising  merging
-    ch_versions = Channel.empty()
-    // modify the meta for the combining
-    ch_sub_config.map{
-            meta, yaml -> [ [id: meta.id, split_id: meta.split_id], meta, yaml]
-        }.set{ ch_sub_config }
 
     // ==============================================================================
     // Transform data using stimulus
     // ==============================================================================
 
-    // combine all against all data vs configs
+    // combine data vs configs based on common key: split_id
     ch_input = ch_split_data
-        .combine(ch_sub_config, by: 0)
-        .map{
-            meta_split, csv, meta, yaml -> [meta, yaml, csv]
+        .map { meta, data ->
+            [[split_id: meta.split_id], meta, data]
         }
-        .multiMap{ meta, config, data ->
-            data: [meta, data]
-            config: [meta, config]
+        .combine(
+            ch_config_transform.map { meta, config ->
+                [[split_id: meta.split_id], meta, config]
+            }
+            ,by: 0
+        )
+        .multiMap{ key, meta_data, data, meta_config, config ->
+            def meta = meta_data + [transform_id: meta_config.transform_id]
+            data:
+            [meta, data]
+            config:
+            [meta, config]
         }
 
+    // run stimulus transform
     STIMULUS_TRANSFORM_CSV(
         ch_input.data,
         ch_input.config