Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
a084f6f
add dev.config for stimulus development stage related params
suzannejin Mar 24, 2025
f93e8d7
fix bug with params.container_dev
suzannejin Mar 25, 2025
ab3f607
add test_ibis conf
suzannejin Mar 25, 2025
bbf290f
modify tune to store optuna artifacts
suzannejin Mar 25, 2025
e89f920
add test_ibis to ci
suzannejin Mar 25, 2025
59c8fed
use downsampled data for test_ibis
suzannejin Mar 25, 2025
5979d62
lint
suzannejin Mar 26, 2025
f666c03
update split_csv subworkflow to run split csv on each data
suzannejin Mar 26, 2025
0f2cde3
improve naming from ch_sub_config to ch_config_split
suzannejin Mar 26, 2025
2e013a2
properly combine data and config in transform_csv subworkflow
suzannejin Mar 26, 2025
be7ae8d
update test_ibis_with_preprocessing.config
suzannejin Mar 26, 2025
ad90efa
Merge branch 'dev' into dev-config
bjlang Mar 26, 2025
7bc8b49
combine channels properly for tune subworkflow
suzannejin Mar 26, 2025
feac397
Merge branch 'dev-config' of https://github.com/nf-core/deepmodelopti…
suzannejin Mar 26, 2025
24ecc08
allow params.genome to use local files
Mar 26, 2025
04f4c94
fix error with ch_genome
Mar 26, 2025
23b0979
solve issue with not caching CHECK_MODEL
suzannejin Mar 26, 2025
b4edb5c
remove the empty element from check model out
suzannejin Mar 26, 2025
388a680
fix bug in tuning input channel parsing
suzannejin Mar 26, 2025
64553a1
add comment
suzannejin Mar 26, 2025
e9b0324
update test_ibis.config
suzannejin Mar 26, 2025
d7bc654
properly save all results from tune module
suzannejin Mar 26, 2025
66c6fbf
fix linting
suzannejin Mar 26, 2025
25fe7ba
[automated] Fix code linting
nf-core-bot Mar 26, 2025
4a475ba
fix small bug
suzannejin Mar 26, 2025
538982f
Merge branch 'dev-config' of https://github.com/nf-core/deepmodelopti…
suzannejin Mar 26, 2025
1e9d8e1
Update ci.ym
suzannejin Mar 27, 2025
f520da2
merge and resolve conflict
suzannejin Mar 27, 2025
3a32edd
fix bug
suzannejin Mar 27, 2025
7fd5ffd
add n_trial to meta
suzannejin Mar 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ jobs:
- "singularity"
test_name:
- "test"
- "test_ibis"
- "test_ibis_with_preprocessing"
- "test_noise_eval"
isMaster:
- ${{ github.base_ref == 'master' }}
Expand Down
7 changes: 7 additions & 0 deletions conf/dev.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
params {
config_profile_name = 'Development profile'
config_profile_description = 'Params needed during development'

// container
container_dev = "docker.io/mathysgrapotte/stimulus-py:dev"
}
43 changes: 32 additions & 11 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -90,62 +90,83 @@ process {
// main config
// ==============================================================================

withName: "STIMULUS_SPLIT_SPLIT" {
withName: "STIMULUS_SPLIT_TRANSFORM" {
publishDir = [
path: { "${params.outdir}/configs/${meta.id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

tag = { "${meta.id}" }
if (params.container_dev) {
container = params.container_dev
}
}

withName: "STIMULUS_SPLIT_DATA" {
publishDir = [
enabled: false
]
ext.prefix = { "${meta.id}-split-${meta2.split_id}" }
ext.prefix = { "${meta.id}-split-${meta.split_id}" }
tag = { "${meta.id} - split: ${meta.split_id}" }
if (params.container_dev) {
container = params.container_dev
}
}

withName: "STIMULUS_TRANSFORM_CSV" {
publishDir = [
path: { "${params.outdir}/data/${meta.id}" },
path: { "${params.outdir}/transformed_data/${meta.id}" },
mode: params.publish_dir_mode,
when: params.save_data,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
ext.prefix = { "${meta.id}-${meta2.split_id}-${meta2.transform_id}" }
ext.prefix = { "${meta.id}-split-${meta.split_id}-trans-${meta.transform_id}" }
tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id}" }
if (params.container_dev) {
container = params.container_dev
}
}

withName: "CHECK_MODEL" {
ext.args = { [
params.check_model_num_samples ? "-n ${params.check_model_num_samples}" : '',
params.debug_mode ? "--debug_mode" : ''
].flatten().unique(false).join(' ').trim()}

tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id}" }
if (params.container_dev) {
container = params.container_dev
}
}

withName: "STIMULUS_TUNE" {
ext.args = { [
params.debug_mode ? "--debug_mode" : ''
].flatten().unique(false).join(' ').trim()}
publishDir = [
path: { "${params.outdir}/tune_results/${meta.id}/${meta.split_id}/${meta .transform_id}" },
path: { "${params.outdir}/tune_results/${meta.id}/${meta.split_id}/${meta.transform_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
ext.prefix = { "${meta.id}-${meta.split_id}-${meta.transform_id}" }
tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id} - replicate: ${meta.replicate}" }
ext.prefix = { "${meta.id}-split-${meta.split_id}-trans-${meta.transform_id}-rep-${meta.replicate}" }
tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id} - replicate: ${meta.replicate}" }
if (params.container_dev) {
container = params.container_dev
}
}

withName: "STIMULUS_PREDICT" {
ext.prefix = { "${meta.id}-${meta.split_id}-${meta.transform_id}-${meta.replicate}" }
tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id} - replicate: ${meta.replicate}" }
ext.prefix = { "${meta.id}-split-${meta.split_id}-transf-${meta.transform_id}-rep-${meta.replicate}" }
tag = { "${meta.id} - split: ${meta.split_id} - transform: ${meta.transform_id} - replicate: ${meta.replicate}" }
if (params.container_dev) {
container = params.container_dev
}
}

withName: "STIMULUS_COMPARE_TENSORS_COSINE" {
ext.args = { "--mode cosine_similarity" }
if (params.container_dev) {
container = params.container_dev
}
}

withName: "CONCAT_COSINE" {
Expand Down
47 changes: 47 additions & 0 deletions conf/test_ibis.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run main.nf -profile test_ibis,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test ibis profile'
config_profile_description = 'Minimal ibis test dataset to check pipeline functions'

// Input data
data = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ibis_SP140/SP140_shade.stimulus.csv'
data_config = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/data.yaml'
model = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ConvBasic_withEfficientKAN.py'
model_config = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ConvBasic_withEfficientKAN.yaml'

// output
save_data = true
}

// Limit resources so that this can run on GitHub Actions
process {
maxRetries = params.max_retries
errorStrategy = params.err_start

withLabel:process_low {
cpus = { 1 }
memory = { 4.GB * task.attempt }
time = { 10.m * task.attempt }
}
withLabel:process_medium {
cpus = { 2 }
memory = { 6.GB * task.attempt }
time = { 30.m * task.attempt }
}
withLabel:process_high {
cpus = { 4 }
memory = { 8.GB * task.attempt }
time = { 1.h * task.attempt }
}
}
50 changes: 50 additions & 0 deletions conf/test_ibis_with_preprocessing.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run main.nf -profile test_ibis,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test ibis profile - with preprocessing'
config_profile_description = 'Minimal ibis test dataset (with preprocessing) to check pipeline functions'

// Input data
data = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ibis_SP140/SP140_ghts.peaks'
data_config = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/data.yaml'
model = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ConvBasic_withEfficientKAN.py'
model_config = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ConvBasic_withEfficientKAN.yaml'
preprocessing_config = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/ibis/ibis_SP140/preprocessing.yaml'
genome = 'GRCh38'
bed_peak_size = 40

// output
save_data = true
}

// Limit resources so that this can run on GitHub Actions
process {
maxRetries = params.max_retries
errorStrategy = params.err_start

withLabel:process_low {
cpus = { 1 }
memory = { 4.GB * task.attempt }
time = { 10.m * task.attempt }
}
withLabel:process_medium {
cpus = { 2 }
memory = { 6.GB * task.attempt }
time = { 30.m * task.attempt }
}
withLabel:process_high {
cpus = { 4 }
memory = { 8.GB * task.attempt }
time = { 1.h * task.attempt }
}
}
15 changes: 8 additions & 7 deletions modules/local/stimulus/tune/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ process STIMULUS_TUNE {
tuple val(meta2), path(model), path(model_config), path(initial_weights)

output:
tuple val(meta_complete), path(model), path("best_config.json"), path("${prefix}-best-model.safetensors") , emit: best_model
tuple val(meta_complete), path("${prefix}-best-optimizer.opt") , emit: optimizer
tuple val(meta_complete), path("TuneModel_*") , emit: tune_experiments, optional: true
// Now we need to output this one for the predict module - this will be have to be changed!
tuple val(meta), path(data_sub_config) , emit: data_config
path "versions.yml" , emit: versions
tuple val(meta), path("${prefix}-best-model.safetensors") , emit: model
tuple val(meta), path("${prefix}-best-optimizer.opt") , emit: optimizer
tuple val(meta), path("optuna_results/artifacts") , emit: artifacts
tuple val(meta), path("optuna_results/optuna_journal_storage.log"), emit: journal
path "versions.yml" , emit: versions
// now we need to output these in this format for the predict module - thiw will have to be changed!
tuple val(meta), path(model), path("best_config.json"), path("${prefix}-best-model.safetensors"), emit: model_tmp
tuple val(meta), path(data_sub_config) , emit: data_config_tmp

script:
prefix = task.ext.prefix ?: meta.id
def args = task.ext.args ?: ""
def use_initial_weights = initial_weights != [] ? "-w ${initial_weights}" : ""
meta_complete = meta2 + meta
"""
stimulus tune \
-d ${transformed_data} \
Expand Down
13 changes: 10 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ params {
shuffle = true // flag to tell wether to shuffle or not the data and run a train on it. Sanity check always run on default. (If the way we think at shuffle change maybe is better to remove this flag and make it into a parameter of the user given json for noise nad split)
debug_mode = false // flag used to switch to debug mode for the pipeline.

// Development stage options
// please change them in dev.config
container_dev = null

// General
help = false
validate_params = true // tells wether or not to validate input values using nf-schema.
Expand Down Expand Up @@ -191,10 +195,13 @@ profiles {
]
}
}
test { includeConfig "conf/test.config" }
test_stub { includeConfig "conf/test_stub.config" }
test { includeConfig "conf/test.config" }
test_stub { includeConfig "conf/test_stub.config" }
test_ibis { includeConfig "conf/test_ibis.config" }
test_ibis_with_preprocessing { includeConfig "conf/test_ibis_with_preprocessing.config" }
test_noise_eval { includeConfig "conf/test_noise_eval.config" }
local { includeConfig "conf/local.config" }
local { includeConfig "conf/local.config" }
dev { includeConfig "conf/dev.config" }
}

// Load nf-core custom profiles from different Institutions
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@
"minimum": 0,
"help_text": "requesting the gpus for the tuning steps.",
"description": "set maximum GPU limit"
},
"container_dev": {
"type": "string",
"default": null,
"help_text": "Container for development stage.",
"description": "Container for development stage"
}
}
},
Expand Down
18 changes: 15 additions & 3 deletions subworkflows/local/split_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow SPLIT_CSV_WF {

take:
ch_data
ch_yaml_sub_config
ch_config_split

main:

Expand All @@ -26,9 +26,21 @@ workflow SPLIT_CSV_WF {
// Split csv data using stimulus
// ==============================================================================

// combine each data with each split config
ch_input = ch_data
.combine(ch_config_split)
.multiMap { meta_data, data, meta_config, config ->
def meta = meta_data + [split_id: meta_config.split_id]
data:
[meta, data]
config:
[meta, config]
}

// run stimulus split
STIMULUS_SPLIT_DATA(
ch_data,
ch_yaml_sub_config
ch_input.data,
ch_input.config
)
ch_split_data = STIMULUS_SPLIT_DATA.out.csv_with_split
ch_versions = ch_versions.mix(STIMULUS_SPLIT_DATA.out.versions)
Expand Down
34 changes: 21 additions & 13 deletions subworkflows/local/transform_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,40 @@ workflow TRANSFORM_CSV_WF {

take:
ch_split_data
ch_sub_config
ch_config_transform

main:

ch_versions = Channel.empty()


// TODO add strategy for handling the launch of stimulus noiser as well as NF-core and other modules
// TODO if the option is parellalization (for the above) then add csv column splitting noising merging
ch_versions = Channel.empty()
// modify the meta for the combining
ch_sub_config.map{
meta, yaml -> [ [id: meta.id, split_id: meta.split_id], meta, yaml]
}.set{ ch_sub_config }

// ==============================================================================
// Transform data using stimulus
// ==============================================================================

// combine all against all data vs configs
// combine data vs configs based on common key: split_id
ch_input = ch_split_data
.combine(ch_sub_config, by: 0)
.map{
meta_split, csv, meta, yaml -> [meta, yaml, csv]
.map { meta, data ->
[[split_id: meta.split_id], meta, data]
}
.multiMap{ meta, config, data ->
data: [meta, data]
config: [meta, config]
.combine(
ch_config_transform.map { meta, config ->
[[split_id: meta.split_id], meta, config]
}
,by: 0
)
.multiMap{ key, meta_data, data, meta_config, config ->
def meta = meta_data + [transform_id: meta_config.transform_id]
data:
[meta, data]
config:
[meta, config]
}

// run stimulus transform
STIMULUS_TRANSFORM_CSV(
ch_input.data,
ch_input.config
Expand Down
Loading